aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-memmap71
-rw-r--r--Documentation/kdump/kdump.txt2
-rw-r--r--Documentation/kernel-parameters.txt37
-rw-r--r--Documentation/nmi_watchdog.txt16
-rw-r--r--Documentation/x86/i386/IO-APIC.txt (renamed from Documentation/i386/IO-APIC.txt)0
-rw-r--r--Documentation/x86/i386/boot.txt (renamed from Documentation/i386/boot.txt)79
-rw-r--r--Documentation/x86/i386/usb-legacy-support.txt (renamed from Documentation/i386/usb-legacy-support.txt)0
-rw-r--r--Documentation/x86/i386/zero-page.txt (renamed from Documentation/i386/zero-page.txt)0
-rw-r--r--Documentation/x86/x86_64/00-INDEX (renamed from Documentation/x86_64/00-INDEX)0
-rw-r--r--Documentation/x86/x86_64/boot-options.txt (renamed from Documentation/x86_64/boot-options.txt)0
-rw-r--r--Documentation/x86/x86_64/cpu-hotplug-spec (renamed from Documentation/x86_64/cpu-hotplug-spec)0
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets (renamed from Documentation/x86_64/fake-numa-for-cpusets)0
-rw-r--r--Documentation/x86/x86_64/kernel-stacks (renamed from Documentation/x86_64/kernel-stacks)0
-rw-r--r--Documentation/x86/x86_64/machinecheck (renamed from Documentation/x86_64/machinecheck)0
-rw-r--r--Documentation/x86/x86_64/mm.txt (renamed from Documentation/x86_64/mm.txt)5
-rw-r--r--Documentation/x86/x86_64/uefi.txt (renamed from Documentation/x86_64/uefi.txt)4
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/x86/Kconfig280
-rw-r--r--arch/x86/Kconfig.cpu6
-rw-r--r--arch/x86/Kconfig.debug19
-rw-r--r--arch/x86/Makefile27
-rw-r--r--arch/x86/boot/a20.c5
-rw-r--r--arch/x86/boot/compressed/head_64.S5
-rw-r--r--arch/x86/boot/compressed/misc.c59
-rw-r--r--arch/x86/boot/compressed/relocs.c198
-rw-r--r--arch/x86/boot/cpu.c2
-rw-r--r--arch/x86/boot/main.c4
-rw-r--r--arch/x86/boot/memory.c3
-rw-r--r--arch/x86/boot/pmjump.S4
-rw-r--r--arch/x86/boot/video-vga.c3
-rw-r--r--arch/x86/configs/i386_defconfig1711
-rw-r--r--arch/x86/configs/x86_64_defconfig1735
-rw-r--r--arch/x86/ia32/ia32entry.S68
-rw-r--r--arch/x86/kernel/Makefile21
-rw-r--r--arch/x86/kernel/acpi/boot.c443
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/amd_iommu.c962
-rw-r--r--arch/x86/kernel/amd_iommu_init.c875
-rw-r--r--arch/x86/kernel/aperture_64.c313
-rw-r--r--arch/x86/kernel/apic_32.c123
-rw-r--r--arch/x86/kernel/apic_64.c68
-rw-r--r--arch/x86/kernel/apm_32.c25
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c7
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c17
-rw-r--r--arch/x86/kernel/cpu/amd.c42
-rw-r--r--arch/x86/kernel/cpu/amd_64.c221
-rw-r--r--arch/x86/kernel/cpu/bugs.c27
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c (renamed from arch/x86/kernel/bugs_64.c)0
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c45
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/common_64.c679
-rw-r--r--arch/x86/kernel/cpu/cpu.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c44
-rw-r--r--arch/x86/kernel/cpu/intel_64.c105
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c24
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c90
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c38
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c901
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h3
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c206
-rw-r--r--arch/x86/kernel/e820.c1405
-rw-r--r--arch/x86/kernel/e820_32.c775
-rw-r--r--arch/x86/kernel/e820_64.c952
-rw-r--r--arch/x86/kernel/early-quirks.c15
-rw-r--r--arch/x86/kernel/efi.c67
-rw-r--r--arch/x86/kernel/efi_64.c8
-rw-r--r--arch/x86/kernel/entry_32.S11
-rw-r--r--arch/x86/kernel/entry_64.S61
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c214
-rw-r--r--arch/x86/kernel/head.c55
-rw-r--r--arch/x86/kernel/head32.c27
-rw-r--r--arch/x86/kernel/head64.c96
-rw-r--r--arch/x86/kernel/head_32.S13
-rw-r--r--arch/x86/kernel/head_64.S99
-rw-r--r--arch/x86/kernel/hpet.c43
-rw-r--r--arch/x86/kernel/i387.c4
-rw-r--r--arch/x86/kernel/i8259.c (renamed from arch/x86/kernel/i8259_32.c)136
-rw-r--r--arch/x86/kernel/i8259_64.c512
-rw-r--r--arch/x86/kernel/io_apic_32.c686
-rw-r--r--arch/x86/kernel/io_apic_64.c290
-rw-r--r--arch/x86/kernel/ipi.c1
-rw-r--r--arch/x86/kernel/irq_32.c254
-rw-r--r--arch/x86/kernel/irq_64.c28
-rw-r--r--arch/x86/kernel/irqinit_32.c114
-rw-r--r--arch/x86/kernel/irqinit_64.c217
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/microcode.c29
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c1
-rw-r--r--arch/x86/kernel/mpparse.c847
-rw-r--r--arch/x86/kernel/nmi.c (renamed from arch/x86/kernel/nmi_32.c)227
-rw-r--r--arch/x86/kernel/nmi_64.c482
-rw-r--r--arch/x86/kernel/numaq_32.c25
-rw-r--r--arch/x86/kernel/paravirt.c36
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c9
-rw-r--r--arch/x86/kernel/pci-calgary_64.c4
-rw-r--r--arch/x86/kernel/pci-dma.c22
-rw-r--r--arch/x86/kernel/pci-gart_64.c87
-rw-r--r--arch/x86/kernel/pci-swiotlb_64.c2
-rw-r--r--arch/x86/kernel/probe_roms_32.c166
-rw-r--r--arch/x86/kernel/process.c190
-rw-r--r--arch/x86/kernel/process_32.c65
-rw-r--r--arch/x86/kernel/process_64.c85
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/quirks.c58
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c4
-rw-r--r--arch/x86/kernel/setup.c916
-rw-r--r--arch/x86/kernel/setup64.c287
-rw-r--r--arch/x86/kernel/setup_32.c964
-rw-r--r--arch/x86/kernel/setup_64.c1194
-rw-r--r--arch/x86/kernel/setup_percpu.c399
-rw-r--r--arch/x86/kernel/smpboot.c212
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/sys_i386_32.c64
-rw-r--r--arch/x86/kernel/time_32.c6
-rw-r--r--arch/x86/kernel/time_64.c16
-rw-r--r--arch/x86/kernel/tlb_64.c5
-rw-r--r--arch/x86/kernel/tlb_uv.c792
-rw-r--r--arch/x86/kernel/trampoline.c2
-rw-r--r--arch/x86/kernel/traps_32.c190
-rw-r--r--arch/x86/kernel/traps_64.c516
-rw-r--r--arch/x86/kernel/tsc.c533
-rw-r--r--arch/x86/kernel/tsc_32.c451
-rw-r--r--arch/x86/kernel/tsc_64.c357
-rw-r--r--arch/x86/kernel/visws_quirks.c709
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/kernel/vmiclock_32.c7
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S7
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S8
-rw-r--r--arch/x86/kernel/vsmp_64.c3
-rw-r--r--arch/x86/kernel/vsyscall_64.c12
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c5
-rw-r--r--arch/x86/lguest/Kconfig2
-rw-r--r--arch/x86/lguest/boot.c11
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/lib/copy_user_64.S429
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S283
-rw-r--r--arch/x86/lib/delay.c (renamed from arch/x86/lib/delay_32.c)38
-rw-r--r--arch/x86/lib/delay_64.c85
-rw-r--r--arch/x86/lib/getuser.S (renamed from arch/x86/lib/getuser_64.S)87
-rw-r--r--arch/x86/lib/getuser_32.S78
-rw-r--r--arch/x86/lib/putuser.S (renamed from arch/x86/lib/putuser_32.S)73
-rw-r--r--arch/x86/lib/putuser_64.S106
-rw-r--r--arch/x86/lib/usercopy_64.c23
-rw-r--r--arch/x86/mach-default/setup.c74
-rw-r--r--arch/x86/mach-es7000/Makefile1
-rw-r--r--arch/x86/mach-es7000/es7000plat.c49
-rw-r--r--arch/x86/mach-generic/Makefile10
-rw-r--r--arch/x86/mach-generic/bigsmp.c4
-rw-r--r--arch/x86/mach-generic/numaq.c41
-rw-r--r--arch/x86/mach-generic/probe.c15
-rw-r--r--arch/x86/mach-visws/Makefile8
-rw-r--r--arch/x86/mach-visws/mpparse.c88
-rw-r--r--arch/x86/mach-visws/reboot.c55
-rw-r--r--arch/x86/mach-visws/setup.c183
-rw-r--r--arch/x86/mach-visws/traps.c69
-rw-r--r--arch/x86/mach-visws/visws_apic.c297
-rw-r--r--arch/x86/mach-voyager/setup.c37
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c14
-rw-r--r--arch/x86/math-emu/reg_constant.c8
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/discontig_32.c285
-rw-r--r--arch/x86/mm/dump_pagetables.c2
-rw-r--r--arch/x86/mm/fault.c97
-rw-r--r--arch/x86/mm/init_32.c523
-rw-r--r--arch/x86/mm/init_64.c540
-rw-r--r--arch/x86/mm/ioremap.c26
-rw-r--r--arch/x86/mm/k8topology_64.c21
-rw-r--r--arch/x86/mm/numa_64.c93
-rw-r--r--arch/x86/mm/pageattr-test.c21
-rw-r--r--arch/x86/mm/pageattr.c60
-rw-r--r--arch/x86/mm/pat.c378
-rw-r--r--arch/x86/mm/pgtable.c190
-rw-r--r--arch/x86/mm/pgtable_32.c56
-rw-r--r--arch/x86/mm/srat_32.c (renamed from arch/x86/kernel/srat_32.c)222
-rw-r--r--arch/x86/mm/srat_64.c21
-rw-r--r--arch/x86/oprofile/nmi_int.c3
-rw-r--r--arch/x86/pci/Makefile22
-rw-r--r--arch/x86/pci/Makefile_3224
-rw-r--r--arch/x86/pci/Makefile_6417
-rw-r--r--arch/x86/pci/acpi.c21
-rw-r--r--arch/x86/pci/amd_bus.c (renamed from arch/x86/pci/k8-bus_64.c)108
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/direct.c25
-rw-r--r--arch/x86/pci/i386.c8
-rw-r--r--arch/x86/pci/init.c4
-rw-r--r--arch/x86/pci/irq.c266
-rw-r--r--arch/x86/pci/legacy.c16
-rw-r--r--arch/x86/pci/mmconfig-shared.c2
-rw-r--r--arch/x86/pci/mp_bus_to_node.c23
-rw-r--r--arch/x86/pci/numa.c33
-rw-r--r--arch/x86/pci/pci.h13
-rw-r--r--arch/x86/pci/visws.c28
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--arch/x86/vdso/vdso32-setup.c11
-rw-r--r--arch/x86/vdso/vma.c2
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c176
-rw-r--r--arch/x86/xen/manage.c143
-rw-r--r--arch/x86/xen/mmu.c270
-rw-r--r--arch/x86/xen/mmu.h12
-rw-r--r--arch/x86/xen/multicalls.c40
-rw-r--r--arch/x86/xen/multicalls.h12
-rw-r--r--arch/x86/xen/setup.c30
-rw-r--r--arch/x86/xen/smp.c8
-rw-r--r--arch/x86/xen/suspend.c45
-rw-r--r--arch/x86/xen/time.c17
-rw-r--r--arch/x86/xen/xen-head.S5
-rw-r--r--arch/x86/xen/xen-ops.h13
-rw-r--r--drivers/acpi/Kconfig2
-rw-r--r--drivers/base/power/trace.c2
-rw-r--r--drivers/base/topology.c25
-rw-r--r--drivers/char/agp/amd64-agp.c85
-rw-r--r--drivers/char/hvc_xen.c61
-rw-r--r--drivers/firmware/Kconfig10
-rw-r--r--drivers/firmware/Makefile1
-rw-r--r--drivers/firmware/dmi_scan.c5
-rw-r--r--drivers/firmware/memmap.c205
-rw-r--r--drivers/input/xen-kbdfront.c20
-rw-r--r--drivers/lguest/Kconfig2
-rw-r--r--drivers/lguest/lg.h1
-rw-r--r--drivers/pci/intel-iommu.c51
-rw-r--r--drivers/video/sgivwfb.c3
-rw-r--r--drivers/video/xen-fbfront.c211
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/balloon.c10
-rw-r--r--drivers/xen/events.c114
-rw-r--r--drivers/xen/grant-table.c4
-rw-r--r--drivers/xen/manage.c252
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c23
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/proc/proc_misc.c16
-rw-r--r--include/asm-generic/pgtable.h57
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/asm-x86/acpi.h5
-rw-r--r--include/asm-x86/amd_iommu.h32
-rw-r--r--include/asm-x86/amd_iommu_types.h244
-rw-r--r--include/asm-x86/apic.h14
-rw-r--r--include/asm-x86/asm.h55
-rw-r--r--include/asm-x86/atomic_64.h32
-rw-r--r--include/asm-x86/bios_ebda.h2
-rw-r--r--include/asm-x86/bitops.h68
-rw-r--r--include/asm-x86/bootparam.h2
-rw-r--r--include/asm-x86/cmpxchg_64.h37
-rw-r--r--include/asm-x86/cpufeature.h9
-rw-r--r--include/asm-x86/current.h42
-rw-r--r--include/asm-x86/current_32.h17
-rw-r--r--include/asm-x86/current_64.h27
-rw-r--r--include/asm-x86/desc.h46
-rw-r--r--include/asm-x86/desc_defs.h4
-rw-r--r--include/asm-x86/dmi.h8
-rw-r--r--include/asm-x86/dwarf2.h62
-rw-r--r--include/asm-x86/dwarf2_32.h61
-rw-r--r--include/asm-x86/dwarf2_64.h56
-rw-r--r--include/asm-x86/e820.h107
-rw-r--r--include/asm-x86/e820_32.h50
-rw-r--r--include/asm-x86/e820_64.h56
-rw-r--r--include/asm-x86/efi.h2
-rw-r--r--include/asm-x86/elf.h2
-rw-r--r--include/asm-x86/fixmap.h55
-rw-r--r--include/asm-x86/fixmap_32.h50
-rw-r--r--include/asm-x86/fixmap_64.h58
-rw-r--r--include/asm-x86/gart.h84
-rw-r--r--include/asm-x86/genapic_32.h5
-rw-r--r--include/asm-x86/genapic_64.h8
-rw-r--r--include/asm-x86/hardirq.h6
-rw-r--r--include/asm-x86/highmem.h3
-rw-r--r--include/asm-x86/hpet.h2
-rw-r--r--include/asm-x86/hw_irq.h105
-rw-r--r--include/asm-x86/hw_irq_32.h66
-rw-r--r--include/asm-x86/hw_irq_64.h173
-rw-r--r--include/asm-x86/i8259.h2
-rw-r--r--include/asm-x86/io.h83
-rw-r--r--include/asm-x86/io_32.h61
-rw-r--r--include/asm-x86/io_64.h71
-rw-r--r--include/asm-x86/io_apic.h39
-rw-r--r--include/asm-x86/iommu.h31
-rw-r--r--include/asm-x86/ipi.h1
-rw-r--r--include/asm-x86/irq.h51
-rw-r--r--include/asm-x86/irq_32.h51
-rw-r--r--include/asm-x86/irq_64.h51
-rw-r--r--include/asm-x86/irq_vectors.h169
-rw-r--r--include/asm-x86/irqflags.h41
-rw-r--r--include/asm-x86/mach-bigsmp/mach_apic.h2
-rw-r--r--include/asm-x86/mach-bigsmp/mach_mpspec.h8
-rw-r--r--include/asm-x86/mach-default/irq_vectors.h96
-rw-r--r--include/asm-x86/mach-default/irq_vectors_limits.h16
-rw-r--r--include/asm-x86/mach-default/mach_apic.h4
-rw-r--r--include/asm-x86/mach-default/setup_arch.h4
-rw-r--r--include/asm-x86/mach-default/smpboot_hooks.h10
-rw-r--r--include/asm-x86/mach-es7000/mach_mpspec.h8
-rw-r--r--include/asm-x86/mach-generic/mach_mpparse.h7
-rw-r--r--include/asm-x86/mach-numaq/mach_apic.h39
-rw-r--r--include/asm-x86/mach-numaq/mach_mpparse.h11
-rw-r--r--include/asm-x86/mach-numaq/mach_mpspec.h8
-rw-r--r--include/asm-x86/mach-summit/mach_mpspec.h9
-rw-r--r--include/asm-x86/mach-visws/irq_vectors.h62
-rw-r--r--include/asm-x86/mach-visws/mach_apic.h104
-rw-r--r--include/asm-x86/mach-visws/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-visws/setup_arch.h9
-rw-r--r--include/asm-x86/mach-visws/smpboot_hooks.h29
-rw-r--r--include/asm-x86/mach-voyager/irq_vectors.h79
-rw-r--r--include/asm-x86/mmconfig.h12
-rw-r--r--include/asm-x86/mmu_context.h32
-rw-r--r--include/asm-x86/mmu_context_32.h28
-rw-r--r--include/asm-x86/mmu_context_64.h18
-rw-r--r--include/asm-x86/mmzone_32.h26
-rw-r--r--include/asm-x86/mpspec.h36
-rw-r--r--include/asm-x86/mpspec_def.h9
-rw-r--r--include/asm-x86/msr-index.h4
-rw-r--r--include/asm-x86/msr.h5
-rw-r--r--include/asm-x86/nmi.h47
-rw-r--r--include/asm-x86/numa_32.h8
-rw-r--r--include/asm-x86/numa_64.h20
-rw-r--r--include/asm-x86/numaq.h6
-rw-r--r--include/asm-x86/page.h12
-rw-r--r--include/asm-x86/page_32.h15
-rw-r--r--include/asm-x86/page_64.h18
-rw-r--r--include/asm-x86/paravirt.h197
-rw-r--r--include/asm-x86/pat.h8
-rw-r--r--include/asm-x86/pci.h2
-rw-r--r--include/asm-x86/pci_32.h14
-rw-r--r--include/asm-x86/pda.h5
-rw-r--r--include/asm-x86/percpu.h46
-rw-r--r--include/asm-x86/pgalloc.h4
-rw-r--r--include/asm-x86/pgtable.h141
-rw-r--r--include/asm-x86/pgtable_32.h20
-rw-r--r--include/asm-x86/pgtable_64.h8
-rw-r--r--include/asm-x86/processor-flags.h6
-rw-r--r--include/asm-x86/processor.h9
-rw-r--r--include/asm-x86/proto.h2
-rw-r--r--include/asm-x86/ptrace.h8
-rw-r--r--include/asm-x86/reboot.h2
-rw-r--r--include/asm-x86/required-features.h8
-rw-r--r--include/asm-x86/resume-trace.h2
-rw-r--r--include/asm-x86/seccomp_32.h1
-rw-r--r--include/asm-x86/seccomp_64.h1
-rw-r--r--include/asm-x86/segment.h23
-rw-r--r--include/asm-x86/setup.h37
-rw-r--r--include/asm-x86/smp.h27
-rw-r--r--include/asm-x86/srat.h12
-rw-r--r--include/asm-x86/string_32.h323
-rw-r--r--include/asm-x86/suspend_32.h5
-rw-r--r--include/asm-x86/system.h10
-rw-r--r--include/asm-x86/thread_info.h248
-rw-r--r--include/asm-x86/thread_info_32.h205
-rw-r--r--include/asm-x86/thread_info_64.h195
-rw-r--r--include/asm-x86/time.h2
-rw-r--r--include/asm-x86/timer.h4
-rw-r--r--include/asm-x86/topology.h157
-rw-r--r--include/asm-x86/tsc.h2
-rw-r--r--include/asm-x86/uaccess.h448
-rw-r--r--include/asm-x86/uaccess_32.h422
-rw-r--r--include/asm-x86/uaccess_64.h263
-rw-r--r--include/asm-x86/unistd_64.h2
-rw-r--r--include/asm-x86/uv/uv_bau.h337
-rw-r--r--include/asm-x86/uv/uv_hub.h190
-rw-r--r--include/asm-x86/uv/uv_mmrs.h954
-rw-r--r--include/asm-x86/visws/cobalt.h (renamed from include/asm-x86/mach-visws/cobalt.h)0
-rw-r--r--include/asm-x86/visws/lithium.h (renamed from include/asm-x86/mach-visws/lithium.h)0
-rw-r--r--include/asm-x86/visws/piix4.h (renamed from include/asm-x86/mach-visws/piix4.h)0
-rw-r--r--include/asm-x86/visws/sgivw.h5
-rw-r--r--include/asm-x86/vm86.h11
-rw-r--r--include/asm-x86/vmi_time.h2
-rw-r--r--include/asm-x86/xen/hypercall.h11
-rw-r--r--include/asm-x86/xen/page.h25
-rw-r--r--include/asm-x86/xor_32.h5
-rw-r--r--include/asm-x86/xor_64.h5
-rw-r--r--include/linux/acpi.h6
-rw-r--r--include/linux/bootmem.h2
-rw-r--r--include/linux/console.h2
-rw-r--r--include/linux/delay.h1
-rw-r--r--include/linux/efi.h4
-rw-r--r--include/linux/firmware-map.h74
-rw-r--r--include/linux/kernel_stat.h2
-rw-r--r--include/linux/linkage.h4
-rw-r--r--include/linux/mm.h7
-rw-r--r--include/linux/page-flags.h1
-rw-r--r--include/linux/pageblock-flags.h8
-rw-r--r--include/linux/resume-trace.h2
-rw-r--r--include/xen/events.h4
-rw-r--r--include/xen/grant_table.h3
-rw-r--r--include/xen/hvc-console.h9
-rw-r--r--include/xen/interface/elfnote.h20
-rw-r--r--include/xen/interface/features.h3
-rw-r--r--include/xen/interface/io/fbif.h29
-rw-r--r--include/xen/interface/io/kbdif.h2
-rw-r--r--include/xen/interface/memory.h12
-rw-r--r--include/xen/interface/xen.h9
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--init/calibrate.c60
-rw-r--r--kernel/printk.c5
-rw-r--r--kernel/sched.c18
-rw-r--r--kernel/time/tick-broadcast.c6
-rw-r--r--mm/mprotect.c10
-rw-r--r--mm/page_alloc.c94
-rw-r--r--sound/oss/vwsnd.c2
406 files changed, 24412 insertions, 16637 deletions
diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap
new file mode 100644
index 000000000000..0d99ee6ae02e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-memmap
@@ -0,0 +1,71 @@
1What: /sys/firmware/memmap/
2Date: June 2008
3Contact: Bernhard Walle <bwalle@suse.de>
4Description:
5 On all platforms, the firmware provides a memory map which the
6 kernel reads. The resources from that memory map are registered
7 in the kernel resource tree and exposed to userspace via
8 /proc/iomem (together with other resources).
9
10 However, on most architectures that firmware-provided memory
11 map is modified afterwards by the kernel itself, either because
12 the kernel merges that memory map with other information or
13 just because the user overwrites that memory map via command
14 line.
15
16 kexec needs the raw firmware-provided memory map to setup the
17 parameter segment of the kernel that should be booted with
18 kexec. Also, the raw memory map is useful for debugging. For
19 that reason, /sys/firmware/memmap is an interface that provides
20 the raw memory map to userspace.
21
22 The structure is as follows: Under /sys/firmware/memmap there
23 are subdirectories with the number of the entry as their name:
24
25 /sys/firmware/memmap/0
26 /sys/firmware/memmap/1
27 /sys/firmware/memmap/2
28 /sys/firmware/memmap/3
29 ...
30
31 The maximum depends on the number of memory map entries provided
32 by the firmware. The order is just the order that the firmware
33 provides.
34
35 Each directory contains three files:
36
37 start : The start address (as hexadecimal number with the
38 '0x' prefix).
39 end : The end address, inclusive (regardless whether the
40 firmware provides inclusive or exclusive ranges).
41 type : Type of the entry as string. See below for a list of
42 valid types.
43
44 So, for example:
45
46 /sys/firmware/memmap/0/start
47 /sys/firmware/memmap/0/end
48 /sys/firmware/memmap/0/type
49 /sys/firmware/memmap/1/start
50 ...
51
52 Currently following types exist:
53
54 - System RAM
55 - ACPI Tables
56 - ACPI Non-volatile Storage
57 - reserved
58
59 Following shell snippet can be used to display that memory
60 map in a human-readable format:
61
62 -------------------- 8< ----------------------------------------
63 #!/bin/bash
64 cd /sys/firmware/memmap
65 for dir in * ; do
66 start=$(cat $dir/start)
67 end=$(cat $dir/end)
68 type=$(cat $dir/type)
69 printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type"
70 done
71 -------------------- >8 ----------------------------------------
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index b8e52c0355d3..9691c7f5166c 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -109,7 +109,7 @@ There are two possible methods of using Kdump.
1092) Or use the system kernel binary itself as dump-capture kernel and there is 1092) Or use the system kernel binary itself as dump-capture kernel and there is
110 no need to build a separate dump-capture kernel. This is possible 110 no need to build a separate dump-capture kernel. This is possible
111 only with the architecutres which support a relocatable kernel. As 111 only with the architecutres which support a relocatable kernel. As
112 of today i386 and ia64 architectures support relocatable kernel. 112 of today, i386, x86_64 and ia64 architectures support relocatable kernel.
113 113
114Building a relocatable kernel is advantageous from the point of view that 114Building a relocatable kernel is advantageous from the point of view that
115one does not have to build a second kernel for capturing the dump. But 115one does not have to build a second kernel for capturing the dump. But
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b52f47d588b4..795c487af8e4 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -271,6 +271,17 @@ and is between 256 and 4096 characters. It is defined in the file
271 aic79xx= [HW,SCSI] 271 aic79xx= [HW,SCSI]
272 See Documentation/scsi/aic79xx.txt. 272 See Documentation/scsi/aic79xx.txt.
273 273
274 amd_iommu= [HW,X86-84]
275 Pass parameters to the AMD IOMMU driver in the system.
276 Possible values are:
277 isolate - enable device isolation (each device, as far
278 as possible, will get its own protection
279 domain)
280 amd_iommu_size= [HW,X86-64]
281 Define the size of the aperture for the AMD IOMMU
282 driver. Possible values are:
283 '32M', '64M' (default), '128M', '256M', '512M', '1G'
284
274 amijoy.map= [HW,JOY] Amiga joystick support 285 amijoy.map= [HW,JOY] Amiga joystick support
275 Map of devices attached to JOY0DAT and JOY1DAT 286 Map of devices attached to JOY0DAT and JOY1DAT
276 Format: <a>,<b> 287 Format: <a>,<b>
@@ -599,6 +610,29 @@ and is between 256 and 4096 characters. It is defined in the file
599 See drivers/char/README.epca and 610 See drivers/char/README.epca and
600 Documentation/digiepca.txt. 611 Documentation/digiepca.txt.
601 612
613 disable_mtrr_cleanup [X86]
614 enable_mtrr_cleanup [X86]
615 The kernel tries to adjust MTRR layout from continuous
616 to discrete, to make X server driver able to add WB
617 entry later. This parameter enables/disables that.
618
619 mtrr_chunk_size=nn[KMG] [X86]
620 used for mtrr cleanup. It is largest continous chunk
621 that could hold holes aka. UC entries.
622
623 mtrr_gran_size=nn[KMG] [X86]
624 Used for mtrr cleanup. It is granularity of mtrr block.
625 Default is 1.
626 Large value could prevent small alignment from
627 using up MTRRs.
628
629 mtrr_spare_reg_nr=n [X86]
630 Format: <integer>
631 Range: 0,7 : spare reg number
632 Default : 1
633 Used for mtrr cleanup. It is spare mtrr entries number.
634 Set to 2 or more if your graphical card needs more.
635
602 disable_mtrr_trim [X86, Intel and AMD only] 636 disable_mtrr_trim [X86, Intel and AMD only]
603 By default the kernel will trim any uncacheable 637 By default the kernel will trim any uncacheable
604 memory out of your available memory pool based on 638 memory out of your available memory pool based on
@@ -2116,6 +2150,9 @@ and is between 256 and 4096 characters. It is defined in the file
2116 usbhid.mousepoll= 2150 usbhid.mousepoll=
2117 [USBHID] The interval which mice are to be polled at. 2151 [USBHID] The interval which mice are to be polled at.
2118 2152
2153 add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in
2154 kernel's map of available physical RAM.
2155
2119 vdso= [X86-32,SH,x86-64] 2156 vdso= [X86-32,SH,x86-64]
2120 vdso=2: enable compat VDSO (default with COMPAT_VDSO) 2157 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
2121 vdso=1: enable VDSO (default) 2158 vdso=1: enable VDSO (default)
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
index 757c729ee42e..90aa4531cb67 100644
--- a/Documentation/nmi_watchdog.txt
+++ b/Documentation/nmi_watchdog.txt
@@ -10,7 +10,7 @@ us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt
10which get executed even if the system is otherwise locked up hard). 10which get executed even if the system is otherwise locked up hard).
11This can be used to debug hard kernel lockups. By executing periodic 11This can be used to debug hard kernel lockups. By executing periodic
12NMI interrupts, the kernel can monitor whether any CPU has locked up, 12NMI interrupts, the kernel can monitor whether any CPU has locked up,
13and print out debugging messages if so. 13and print out debugging messages if so.
14 14
15In order to use the NMI watchdog, you need to have APIC support in your 15In order to use the NMI watchdog, you need to have APIC support in your
16kernel. For SMP kernels, APIC support gets compiled in automatically. For 16kernel. For SMP kernels, APIC support gets compiled in automatically. For
@@ -22,8 +22,7 @@ CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain
22kernel debugging options, such as Kernel Stack Meter or Kernel Tracer, 22kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
23may implicitly disable the NMI watchdog.] 23may implicitly disable the NMI watchdog.]
24 24
25For x86-64, the needed APIC is always compiled in, and the NMI watchdog is 25For x86-64, the needed APIC is always compiled in.
26always enabled with I/O-APIC mode (nmi_watchdog=1).
27 26
28Using local APIC (nmi_watchdog=2) needs the first performance register, so 27Using local APIC (nmi_watchdog=2) needs the first performance register, so
29you can't use it for other purposes (such as high precision performance 28you can't use it for other purposes (such as high precision performance
@@ -63,16 +62,15 @@ when the system is idle), but if your system locks up on anything but the
63"hlt", then you are out of luck -- the event will not happen at all and the 62"hlt", then you are out of luck -- the event will not happen at all and the
64watchdog won't trigger. This is a shortcoming of the local APIC watchdog 63watchdog won't trigger. This is a shortcoming of the local APIC watchdog
65-- unfortunately there is no "clock ticks" event that would work all the 64-- unfortunately there is no "clock ticks" event that would work all the
66time. The I/O APIC watchdog is driven externally and has no such shortcoming. 65time. The I/O APIC watchdog is driven externally and has no such shortcoming.
67But its NMI frequency is much higher, resulting in a more significant hit 66But its NMI frequency is much higher, resulting in a more significant hit
68to the overall system performance. 67to the overall system performance.
69 68
70NOTE: starting with 2.4.2-ac18 the NMI-oopser is disabled by default, 69On x86 nmi_watchdog is disabled by default so you have to enable it with
71you have to enable it with a boot time parameter. Prior to 2.4.2-ac18 70a boot time parameter.
72the NMI-oopser is enabled unconditionally on x86 SMP boxes.
73 71
74On x86-64 the NMI oopser is on by default. On 64bit Intel CPUs 72NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
75it uses IO-APIC by default and on AMD it uses local APIC. 73on x86 SMP boxes.
76 74
77[ feel free to send bug reports, suggestions and patches to 75[ feel free to send bug reports, suggestions and patches to
78 Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing 76 Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing
diff --git a/Documentation/i386/IO-APIC.txt b/Documentation/x86/i386/IO-APIC.txt
index 30b4c714fbe1..30b4c714fbe1 100644
--- a/Documentation/i386/IO-APIC.txt
+++ b/Documentation/x86/i386/IO-APIC.txt
diff --git a/Documentation/i386/boot.txt b/Documentation/x86/i386/boot.txt
index 95ad15c3b01f..147bfe511cdd 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/x86/i386/boot.txt
@@ -1,17 +1,14 @@
1 THE LINUX/I386 BOOT PROTOCOL 1 THE LINUX/x86 BOOT PROTOCOL
2 ---------------------------- 2 ---------------------------
3 3
4 H. Peter Anvin <hpa@zytor.com> 4On the x86 platform, the Linux kernel uses a rather complicated boot
5 Last update 2007-05-23
6
7On the i386 platform, the Linux kernel uses a rather complicated boot
8convention. This has evolved partially due to historical aspects, as 5convention. This has evolved partially due to historical aspects, as
9well as the desire in the early days to have the kernel itself be a 6well as the desire in the early days to have the kernel itself be a
10bootable image, the complicated PC memory model and due to changed 7bootable image, the complicated PC memory model and due to changed
11expectations in the PC industry caused by the effective demise of 8expectations in the PC industry caused by the effective demise of
12real-mode DOS as a mainstream operating system. 9real-mode DOS as a mainstream operating system.
13 10
14Currently, the following versions of the Linux/i386 boot protocol exist. 11Currently, the following versions of the Linux/x86 boot protocol exist.
15 12
16Old kernels: zImage/Image support only. Some very early kernels 13Old kernels: zImage/Image support only. Some very early kernels
17 may not even support a command line. 14 may not even support a command line.
@@ -372,10 +369,17 @@ Protocol: 2.00+
372 - If 0, the protected-mode code is loaded at 0x10000. 369 - If 0, the protected-mode code is loaded at 0x10000.
373 - If 1, the protected-mode code is loaded at 0x100000. 370 - If 1, the protected-mode code is loaded at 0x100000.
374 371
372 Bit 5 (write): QUIET_FLAG
373 - If 0, print early messages.
374 - If 1, suppress early messages.
375 This requests to the kernel (decompressor and early
376 kernel) to not write early messages that require
377 accessing the display hardware directly.
378
375 Bit 6 (write): KEEP_SEGMENTS 379 Bit 6 (write): KEEP_SEGMENTS
376 Protocol: 2.07+ 380 Protocol: 2.07+
377 - if 0, reload the segment registers in the 32bit entry point. 381 - If 0, reload the segment registers in the 32bit entry point.
378 - if 1, do not reload the segment registers in the 32bit entry point. 382 - If 1, do not reload the segment registers in the 32bit entry point.
379 Assume that %cs %ds %ss %es are all set to flat segments with 383 Assume that %cs %ds %ss %es are all set to flat segments with
380 a base of 0 (or the equivalent for their environment). 384 a base of 0 (or the equivalent for their environment).
381 385
@@ -504,7 +508,7 @@ Protocol: 2.06+
504 maximum size was 255. 508 maximum size was 255.
505 509
506Field name: hardware_subarch 510Field name: hardware_subarch
507Type: write 511Type: write (optional, defaults to x86/PC)
508Offset/size: 0x23c/4 512Offset/size: 0x23c/4
509Protocol: 2.07+ 513Protocol: 2.07+
510 514
@@ -520,11 +524,13 @@ Protocol: 2.07+
520 0x00000002 Xen 524 0x00000002 Xen
521 525
522Field name: hardware_subarch_data 526Field name: hardware_subarch_data
523Type: write 527Type: write (subarch-dependent)
524Offset/size: 0x240/8 528Offset/size: 0x240/8
525Protocol: 2.07+ 529Protocol: 2.07+
526 530
527 A pointer to data that is specific to hardware subarch 531 A pointer to data that is specific to hardware subarch
532 This field is currently unused for the default x86/PC environment,
533 do not modify.
528 534
529Field name: payload_offset 535Field name: payload_offset
530Type: read 536Type: read
@@ -545,6 +551,34 @@ Protocol: 2.08+
545 551
546 The length of the payload. 552 The length of the payload.
547 553
554Field name: setup_data
555Type: write (special)
556Offset/size: 0x250/8
557Protocol: 2.09+
558
559 The 64-bit physical pointer to NULL terminated single linked list of
560 struct setup_data. This is used to define a more extensible boot
561 parameters passing mechanism. The definition of struct setup_data is
562 as follow:
563
564 struct setup_data {
565 u64 next;
566 u32 type;
567 u32 len;
568 u8 data[0];
569 };
570
571 Where, the next is a 64-bit physical pointer to the next node of
572 linked list, the next field of the last node is 0; the type is used
573 to identify the contents of data; the len is the length of data
574 field; the data holds the real payload.
575
576 This list may be modified at a number of points during the bootup
577 process. Therefore, when modifying this list one should always make
578 sure to consider the case where the linked list already contains
579 entries.
580
581
548**** THE IMAGE CHECKSUM 582**** THE IMAGE CHECKSUM
549 583
550From boot protocol version 2.08 onwards the CRC-32 is calculated over 584From boot protocol version 2.08 onwards the CRC-32 is calculated over
@@ -553,6 +587,7 @@ initial remainder of 0xffffffff. The checksum is appended to the
553file; therefore the CRC of the file up to the limit specified in the 587file; therefore the CRC of the file up to the limit specified in the
554syssize field of the header is always 0. 588syssize field of the header is always 0.
555 589
590
556**** THE KERNEL COMMAND LINE 591**** THE KERNEL COMMAND LINE
557 592
558The kernel command line has become an important way for the boot 593The kernel command line has become an important way for the boot
@@ -584,28 +619,6 @@ command line is entered using the following protocol:
584 covered by setup_move_size, so you may need to adjust this 619 covered by setup_move_size, so you may need to adjust this
585 field. 620 field.
586 621
587Field name: setup_data
588Type: write (obligatory)
589Offset/size: 0x250/8
590Protocol: 2.09+
591
592 The 64-bit physical pointer to NULL terminated single linked list of
593 struct setup_data. This is used to define a more extensible boot
594 parameters passing mechanism. The definition of struct setup_data is
595 as follow:
596
597 struct setup_data {
598 u64 next;
599 u32 type;
600 u32 len;
601 u8 data[0];
602 };
603
604 Where, the next is a 64-bit physical pointer to the next node of
605 linked list, the next field of the last node is 0; the type is used
606 to identify the contents of data; the len is the length of data
607 field; the data holds the real payload.
608
609 622
610**** MEMORY LAYOUT OF THE REAL-MODE CODE 623**** MEMORY LAYOUT OF THE REAL-MODE CODE
611 624
diff --git a/Documentation/i386/usb-legacy-support.txt b/Documentation/x86/i386/usb-legacy-support.txt
index 1894cdfc69d9..1894cdfc69d9 100644
--- a/Documentation/i386/usb-legacy-support.txt
+++ b/Documentation/x86/i386/usb-legacy-support.txt
diff --git a/Documentation/i386/zero-page.txt b/Documentation/x86/i386/zero-page.txt
index 169ad423a3d1..169ad423a3d1 100644
--- a/Documentation/i386/zero-page.txt
+++ b/Documentation/x86/i386/zero-page.txt
diff --git a/Documentation/x86_64/00-INDEX b/Documentation/x86/x86_64/00-INDEX
index 92fc20ab5f0e..92fc20ab5f0e 100644
--- a/Documentation/x86_64/00-INDEX
+++ b/Documentation/x86/x86_64/00-INDEX
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index b0c7b6c4abda..b0c7b6c4abda 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
diff --git a/Documentation/x86_64/cpu-hotplug-spec b/Documentation/x86/x86_64/cpu-hotplug-spec
index 3c23e0587db3..3c23e0587db3 100644
--- a/Documentation/x86_64/cpu-hotplug-spec
+++ b/Documentation/x86/x86_64/cpu-hotplug-spec
diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets
index d1a985c5b00a..d1a985c5b00a 100644
--- a/Documentation/x86_64/fake-numa-for-cpusets
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets
diff --git a/Documentation/x86_64/kernel-stacks b/Documentation/x86/x86_64/kernel-stacks
index 5ad65d51fb95..5ad65d51fb95 100644
--- a/Documentation/x86_64/kernel-stacks
+++ b/Documentation/x86/x86_64/kernel-stacks
diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
index a05e58e7b159..a05e58e7b159 100644
--- a/Documentation/x86_64/machinecheck
+++ b/Documentation/x86/x86_64/machinecheck
diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index b89b6d2bebfa..efce75097369 100644
--- a/Documentation/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -11,9 +11,8 @@ ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space 11ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) 12ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
13... unused hole ... 13... unused hole ...
14ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0 14ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
15... unused hole ... 15ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space
16ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space
17 16
18The direct mapping covers all memory in the system up to the highest 17The direct mapping covers all memory in the system up to the highest
19memory address (this means in some cases it can also include PCI memory 18memory address (this means in some cases it can also include PCI memory
diff --git a/Documentation/x86_64/uefi.txt b/Documentation/x86/x86_64/uefi.txt
index 7d77120a5184..a5e2b4fdb170 100644
--- a/Documentation/x86_64/uefi.txt
+++ b/Documentation/x86/x86_64/uefi.txt
@@ -36,3 +36,7 @@ Mechanics:
36 services. 36 services.
37 noefi turn off all EFI runtime services 37 noefi turn off all EFI runtime services
38 reboot_type=k turn off EFI reboot runtime service 38 reboot_type=k turn off EFI reboot runtime service
39- If the EFI memory map has additional entries not in the E820 map,
40 you can include those entries in the kernels memory map of available
41 physical RAM by using the following kernel command line parameter.
42 add_efi_memmap include EFI memory map of available physical RAM
diff --git a/MAINTAINERS b/MAINTAINERS
index 6476125363e0..c94d038cea33 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -376,6 +376,12 @@ L: linux-geode@lists.infradead.org (moderated for non-subscribers)
376W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html 376W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
377S: Supported 377S: Supported
378 378
379AMD IOMMU (AMD-VI)
380P: Joerg Roedel
381M: joerg.roedel@amd.com
382L: iommu@lists.linux-foundation.org
383S: Supported
384
379AMS (Apple Motion Sensor) DRIVER 385AMS (Apple Motion Sensor) DRIVER
380P: Stelian Pop 386P: Stelian Pop
381M: stelian@popies.net 387M: stelian@popies.net
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bf07b6f50fa1..2cfccc987a26 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
121 def_bool y 121 def_bool y
122 122
123config HAVE_SETUP_PER_CPU_AREA 123config HAVE_SETUP_PER_CPU_AREA
124 def_bool X86_64 || (X86_SMP && !X86_VOYAGER) 124 def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
125 125
126config HAVE_CPUMASK_OF_CPU_MAP 126config HAVE_CPUMASK_OF_CPU_MAP
127 def_bool X86_64_SMP 127 def_bool X86_64_SMP
@@ -181,12 +181,12 @@ config X86_64_SMP
181config X86_HT 181config X86_HT
182 bool 182 bool
183 depends on SMP 183 depends on SMP
184 depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64 184 depends on (X86_32 && !X86_VOYAGER) || X86_64
185 default y 185 default y
186 186
187config X86_BIOS_REBOOT 187config X86_BIOS_REBOOT
188 bool 188 bool
189 depends on !X86_VISWS && !X86_VOYAGER 189 depends on !X86_VOYAGER
190 default y 190 default y
191 191
192config X86_TRAMPOLINE 192config X86_TRAMPOLINE
@@ -230,6 +230,26 @@ config SMP
230 230
231 If you don't know what to do here, say N. 231 If you don't know what to do here, say N.
232 232
233config X86_FIND_SMP_CONFIG
234 def_bool y
235 depends on X86_MPPARSE || X86_VOYAGER
236
237if ACPI
238config X86_MPPARSE
239 def_bool y
240 bool "Enable MPS table"
241 depends on X86_LOCAL_APIC
242 help
243 For old smp systems that do not have proper acpi support. Newer systems
244 (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
245endif
246
247if !ACPI
248config X86_MPPARSE
249 def_bool y
250 depends on X86_LOCAL_APIC
251endif
252
233choice 253choice
234 prompt "Subarchitecture Type" 254 prompt "Subarchitecture Type"
235 default X86_PC 255 default X86_PC
@@ -251,7 +271,7 @@ config X86_ELAN
251 271
252config X86_VOYAGER 272config X86_VOYAGER
253 bool "Voyager (NCR)" 273 bool "Voyager (NCR)"
254 depends on X86_32 && (SMP || BROKEN) 274 depends on X86_32 && (SMP || BROKEN) && !PCI
255 help 275 help
256 Voyager is an MCA-based 32-way capable SMP architecture proprietary 276 Voyager is an MCA-based 32-way capable SMP architecture proprietary
257 to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. 277 to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based.
@@ -261,16 +281,27 @@ config X86_VOYAGER
261 If you do not specifically know you have a Voyager based machine, 281 If you do not specifically know you have a Voyager based machine,
262 say N here, otherwise the kernel you build will not be bootable. 282 say N here, otherwise the kernel you build will not be bootable.
263 283
284config X86_GENERICARCH
285 bool "Generic architecture"
286 depends on X86_32
287 help
288 This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
289 subarchitectures. It is intended for a generic binary kernel.
290 if you select them all, kernel will probe it one by one. and will
291 fallback to default.
292
293if X86_GENERICARCH
294
264config X86_NUMAQ 295config X86_NUMAQ
265 bool "NUMAQ (IBM/Sequent)" 296 bool "NUMAQ (IBM/Sequent)"
266 depends on SMP && X86_32 297 depends on SMP && X86_32 && PCI && X86_MPPARSE
267 select NUMA 298 select NUMA
268 help 299 help
269 This option is used for getting Linux to run on a (IBM/Sequent) NUMA 300 This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
270 multiquad box. This changes the way that processors are bootstrapped, 301 NUMA multiquad box. This changes the way that processors are
271 and uses Clustered Logical APIC addressing mode instead of Flat Logical. 302 bootstrapped, and uses Clustered Logical APIC addressing mode instead
272 You will need a new lynxer.elf file to flash your firmware with - send 303 of Flat Logical. You will need a new lynxer.elf file to flash your
273 email to <Martin.Bligh@us.ibm.com>. 304 firmware with - send email to <Martin.Bligh@us.ibm.com>.
274 305
275config X86_SUMMIT 306config X86_SUMMIT
276 bool "Summit/EXA (IBM x440)" 307 bool "Summit/EXA (IBM x440)"
@@ -279,46 +310,21 @@ config X86_SUMMIT
279 This option is needed for IBM systems that use the Summit/EXA chipset. 310 This option is needed for IBM systems that use the Summit/EXA chipset.
280 In particular, it is needed for the x440. 311 In particular, it is needed for the x440.
281 312
282 If you don't have one of these computers, you should say N here. 313config X86_ES7000
283 If you want to build a NUMA kernel, you must select ACPI. 314 bool "Support for Unisys ES7000 IA32 series"
315 depends on X86_32 && SMP
316 help
317 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
318 supposed to run on an IA32-based Unisys ES7000 system.
284 319
285config X86_BIGSMP 320config X86_BIGSMP
286 bool "Support for other sub-arch SMP systems with more than 8 CPUs" 321 bool "Support for big SMP systems with more than 8 CPUs"
287 depends on X86_32 && SMP 322 depends on X86_32 && SMP
288 help 323 help
289 This option is needed for the systems that have more than 8 CPUs 324 This option is needed for the systems that have more than 8 CPUs
290 and if the system is not of any sub-arch type above. 325 and if the system is not of any sub-arch type above.
291 326
292 If you don't have such a system, you should say N here. 327endif
293
294config X86_VISWS
295 bool "SGI 320/540 (Visual Workstation)"
296 depends on X86_32
297 help
298 The SGI Visual Workstation series is an IA32-based workstation
299 based on SGI systems chips with some legacy PC hardware attached.
300
301 Say Y here to create a kernel to run on the SGI 320 or 540.
302
303 A kernel compiled for the Visual Workstation will not run on PCs
304 and vice versa. See <file:Documentation/sgi-visws.txt> for details.
305
306config X86_GENERICARCH
307 bool "Generic architecture (Summit, bigsmp, ES7000, default)"
308 depends on X86_32
309 help
310 This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
311 It is intended for a generic binary kernel.
312 If you want a NUMA kernel, select ACPI. We need SRAT for NUMA.
313
314config X86_ES7000
315 bool "Support for Unisys ES7000 IA32 series"
316 depends on X86_32 && SMP
317 help
318 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
319 supposed to run on an IA32-based Unisys ES7000 system.
320 Only choose this option if you have such a system, otherwise you
321 should say N here.
322 328
323config X86_RDC321X 329config X86_RDC321X
324 bool "RDC R-321x SoC" 330 bool "RDC R-321x SoC"
@@ -337,7 +343,7 @@ config X86_RDC321X
337config X86_VSMP 343config X86_VSMP
338 bool "Support for ScaleMP vSMP" 344 bool "Support for ScaleMP vSMP"
339 select PARAVIRT 345 select PARAVIRT
340 depends on X86_64 346 depends on X86_64 && PCI
341 help 347 help
342 Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is 348 Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is
343 supposed to run on these EM64T-based machines. Only choose this option 349 supposed to run on these EM64T-based machines. Only choose this option
@@ -345,6 +351,18 @@ config X86_VSMP
345 351
346endchoice 352endchoice
347 353
354config X86_VISWS
355 bool "SGI 320/540 (Visual Workstation)"
356 depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT
357 help
358 The SGI Visual Workstation series is an IA32-based workstation
359 based on SGI systems chips with some legacy PC hardware attached.
360
361 Say Y here to create a kernel to run on the SGI 320 or 540.
362
363 A kernel compiled for the Visual Workstation will run on general
364 PCs as well. See <file:Documentation/sgi-visws.txt> for details.
365
348config SCHED_NO_NO_OMIT_FRAME_POINTER 366config SCHED_NO_NO_OMIT_FRAME_POINTER
349 def_bool y 367 def_bool y
350 prompt "Single-depth WCHAN output" 368 prompt "Single-depth WCHAN output"
@@ -373,7 +391,7 @@ config VMI
373 bool "VMI Guest support" 391 bool "VMI Guest support"
374 select PARAVIRT 392 select PARAVIRT
375 depends on X86_32 393 depends on X86_32
376 depends on !(X86_VISWS || X86_VOYAGER) 394 depends on !X86_VOYAGER
377 help 395 help
378 VMI provides a paravirtualized interface to the VMware ESX server 396 VMI provides a paravirtualized interface to the VMware ESX server
379 (it could be used by other hypervisors in theory too, but is not 397 (it could be used by other hypervisors in theory too, but is not
@@ -384,7 +402,7 @@ config KVM_CLOCK
384 bool "KVM paravirtualized clock" 402 bool "KVM paravirtualized clock"
385 select PARAVIRT 403 select PARAVIRT
386 select PARAVIRT_CLOCK 404 select PARAVIRT_CLOCK
387 depends on !(X86_VISWS || X86_VOYAGER) 405 depends on !X86_VOYAGER
388 help 406 help
389 Turning on this option will allow you to run a paravirtualized clock 407 Turning on this option will allow you to run a paravirtualized clock
390 when running over the KVM hypervisor. Instead of relying on a PIT 408 when running over the KVM hypervisor. Instead of relying on a PIT
@@ -395,7 +413,7 @@ config KVM_CLOCK
395config KVM_GUEST 413config KVM_GUEST
396 bool "KVM Guest support" 414 bool "KVM Guest support"
397 select PARAVIRT 415 select PARAVIRT
398 depends on !(X86_VISWS || X86_VOYAGER) 416 depends on !X86_VOYAGER
399 help 417 help
400 This option enables various optimizations for running under the KVM 418 This option enables various optimizations for running under the KVM
401 hypervisor. 419 hypervisor.
@@ -404,7 +422,7 @@ source "arch/x86/lguest/Kconfig"
404 422
405config PARAVIRT 423config PARAVIRT
406 bool "Enable paravirtualization code" 424 bool "Enable paravirtualization code"
407 depends on !(X86_VISWS || X86_VOYAGER) 425 depends on !X86_VOYAGER
408 help 426 help
409 This changes the kernel so it can modify itself when it is run 427 This changes the kernel so it can modify itself when it is run
410 under a hypervisor, potentially improving performance significantly 428 under a hypervisor, potentially improving performance significantly
@@ -417,51 +435,33 @@ config PARAVIRT_CLOCK
417 435
418endif 436endif
419 437
420config MEMTEST_BOOTPARAM 438config PARAVIRT_DEBUG
421 bool "Memtest boot parameter" 439 bool "paravirt-ops debugging"
440 depends on PARAVIRT && DEBUG_KERNEL
441 help
442 Enable to debug paravirt_ops internals. Specifically, BUG if
443 a paravirt_op is missing when it is called.
444
445config MEMTEST
446 bool "Memtest"
422 depends on X86_64 447 depends on X86_64
423 default y 448 default y
424 help 449 help
425 This option adds a kernel parameter 'memtest', which allows memtest 450 This option adds a kernel parameter 'memtest', which allows memtest
426 to be disabled at boot. If this option is selected, memtest 451 to be set.
427 functionality can be disabled with memtest=0 on the kernel 452 memtest=0, mean disabled; -- default
428 command line. The purpose of this option is to allow a single 453 memtest=1, mean do 1 test pattern;
429 kernel image to be distributed with memtest built in, but not 454 ...
430 necessarily enabled. 455 memtest=4, mean do 4 test patterns.
431
432 If you are unsure how to answer this question, answer Y. 456 If you are unsure how to answer this question, answer Y.
433 457
434config MEMTEST_BOOTPARAM_VALUE
435 int "Memtest boot parameter default value (0-4)"
436 depends on MEMTEST_BOOTPARAM
437 range 0 4
438 default 0
439 help
440 This option sets the default value for the kernel parameter
441 'memtest', which allows memtest to be disabled at boot. If this
442 option is set to 0 (zero), the memtest kernel parameter will
443 default to 0, disabling memtest at bootup. If this option is
444 set to 4, the memtest kernel parameter will default to 4,
445 enabling memtest at bootup, and use that as pattern number.
446
447 If you are unsure how to answer this question, answer 0.
448
449config ACPI_SRAT
450 def_bool y
451 depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
452 select ACPI_NUMA
453
454config HAVE_ARCH_PARSE_SRAT
455 def_bool y
456 depends on ACPI_SRAT
457
458config X86_SUMMIT_NUMA 458config X86_SUMMIT_NUMA
459 def_bool y 459 def_bool y
460 depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) 460 depends on X86_32 && NUMA && X86_GENERICARCH
461 461
462config X86_CYCLONE_TIMER 462config X86_CYCLONE_TIMER
463 def_bool y 463 def_bool y
464 depends on X86_32 && X86_SUMMIT || X86_GENERICARCH 464 depends on X86_GENERICARCH
465 465
466config ES7000_CLUSTERED_APIC 466config ES7000_CLUSTERED_APIC
467 def_bool y 467 def_bool y
@@ -549,6 +549,21 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
549 Calgary anyway, pass 'iommu=calgary' on the kernel command line. 549 Calgary anyway, pass 'iommu=calgary' on the kernel command line.
550 If unsure, say Y. 550 If unsure, say Y.
551 551
552config AMD_IOMMU
553 bool "AMD IOMMU support"
554 select SWIOTLB
555 depends on X86_64 && PCI && ACPI
556 help
557 With this option you can enable support for AMD IOMMU hardware in
558 your system. An IOMMU is a hardware component which provides
559 remapping of DMA memory accesses from devices. With an AMD IOMMU you
560 can isolate the the DMA memory of different devices and protect the
561 system from misbehaving device drivers or hardware.
562
563 You can find out if your system has an AMD IOMMU if you look into
564 your BIOS for an option to enable it or if you have an IVRS ACPI
565 table.
566
552# need this always selected by IOMMU for the VIA workaround 567# need this always selected by IOMMU for the VIA workaround
553config SWIOTLB 568config SWIOTLB
554 bool 569 bool
@@ -560,21 +575,36 @@ config SWIOTLB
560 3 GB of memory. If unsure, say Y. 575 3 GB of memory. If unsure, say Y.
561 576
562config IOMMU_HELPER 577config IOMMU_HELPER
563 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) 578 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
579config MAXSMP
580 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
581 depends on X86_64 && SMP
582 default n
583 help
584 Configure maximum number of CPUS and NUMA Nodes for this architecture.
585 If unsure, say N.
564 586
587if MAXSMP
565config NR_CPUS 588config NR_CPUS
566 int "Maximum number of CPUs (2-255)" 589 int
567 range 2 255 590 default "4096"
591endif
592
593if !MAXSMP
594config NR_CPUS
595 int "Maximum number of CPUs (2-4096)"
596 range 2 4096
568 depends on SMP 597 depends on SMP
569 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 598 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
570 default "8" 599 default "8"
571 help 600 help
572 This allows you to specify the maximum number of CPUs which this 601 This allows you to specify the maximum number of CPUs which this
573 kernel will support. The maximum supported value is 255 and the 602 kernel will support. The maximum supported value is 4096 and the
574 minimum value which makes sense is 2. 603 minimum value which makes sense is 2.
575 604
576 This is purely to save memory - each supported CPU adds 605 This is purely to save memory - each supported CPU adds
577 approximately eight kilobytes to the kernel image. 606 approximately eight kilobytes to the kernel image.
607endif
578 608
579config SCHED_SMT 609config SCHED_SMT
580 bool "SMT (Hyperthreading) scheduler support" 610 bool "SMT (Hyperthreading) scheduler support"
@@ -598,7 +628,7 @@ source "kernel/Kconfig.preempt"
598 628
599config X86_UP_APIC 629config X86_UP_APIC
600 bool "Local APIC support on uniprocessors" 630 bool "Local APIC support on uniprocessors"
601 depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) 631 depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
602 help 632 help
603 A local APIC (Advanced Programmable Interrupt Controller) is an 633 A local APIC (Advanced Programmable Interrupt Controller) is an
604 integrated interrupt controller in the CPU. If you have a single-CPU 634 integrated interrupt controller in the CPU. If you have a single-CPU
@@ -623,11 +653,11 @@ config X86_UP_IOAPIC
623 653
624config X86_LOCAL_APIC 654config X86_LOCAL_APIC
625 def_bool y 655 def_bool y
626 depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) 656 depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
627 657
628config X86_IO_APIC 658config X86_IO_APIC
629 def_bool y 659 def_bool y
630 depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) 660 depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
631 661
632config X86_VISWS_APIC 662config X86_VISWS_APIC
633 def_bool y 663 def_bool y
@@ -681,7 +711,7 @@ config X86_MCE_NONFATAL
681 711
682config X86_MCE_P4THERMAL 712config X86_MCE_P4THERMAL
683 bool "check for P4 thermal throttling interrupt." 713 bool "check for P4 thermal throttling interrupt."
684 depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS 714 depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
685 help 715 help
686 Enabling this feature will cause a message to be printed when the P4 716 Enabling this feature will cause a message to be printed when the P4
687 enters thermal throttling. 717 enters thermal throttling.
@@ -911,9 +941,9 @@ config X86_PAE
911config NUMA 941config NUMA
912 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" 942 bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
913 depends on SMP 943 depends on SMP
914 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) 944 depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
915 default n if X86_PC 945 default n if X86_PC
916 default y if (X86_NUMAQ || X86_SUMMIT) 946 default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
917 help 947 help
918 Enable NUMA (Non Uniform Memory Access) support. 948 Enable NUMA (Non Uniform Memory Access) support.
919 The kernel will try to allocate memory used by a CPU on the 949 The kernel will try to allocate memory used by a CPU on the
@@ -965,13 +995,25 @@ config NUMA_EMU
965 into virtual nodes when booted with "numa=fake=N", where N is the 995 into virtual nodes when booted with "numa=fake=N", where N is the
966 number of nodes. This is only useful for debugging. 996 number of nodes. This is only useful for debugging.
967 997
998if MAXSMP
999
1000config NODES_SHIFT
1001 int
1002 default "9"
1003endif
1004
1005if !MAXSMP
968config NODES_SHIFT 1006config NODES_SHIFT
969 int "Max num nodes shift(1-9)" 1007 int "Maximum NUMA Nodes (as a power of 2)"
970 range 1 9 if X86_64 1008 range 1 9 if X86_64
971 default "6" if X86_64 1009 default "6" if X86_64
972 default "4" if X86_NUMAQ 1010 default "4" if X86_NUMAQ
973 default "3" 1011 default "3"
974 depends on NEED_MULTIPLE_NODES 1012 depends on NEED_MULTIPLE_NODES
1013 help
1014 Specify the maximum number of NUMA Nodes available on the target
1015 system. Increases memory reserved to accomodate various tables.
1016endif
975 1017
976config HAVE_ARCH_BOOTMEM_NODE 1018config HAVE_ARCH_BOOTMEM_NODE
977 def_bool y 1019 def_bool y
@@ -1090,6 +1132,40 @@ config MTRR
1090 1132
1091 See <file:Documentation/mtrr.txt> for more information. 1133 See <file:Documentation/mtrr.txt> for more information.
1092 1134
1135config MTRR_SANITIZER
1136 def_bool y
1137 prompt "MTRR cleanup support"
1138 depends on MTRR
1139 help
1140 Convert MTRR layout from continuous to discrete, so some X driver
1141 could add WB entries.
1142
1143 Say N here if you see bootup problems (boot crash, boot hang,
1144 spontaneous reboots).
1145
1146 Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
1147 could be used to send largest mtrr entry size for continuous block
1148 to hold holes (aka. UC entries)
1149
1150 If unsure, say Y.
1151
1152config MTRR_SANITIZER_ENABLE_DEFAULT
1153 int "MTRR cleanup enable value (0-1)"
1154 range 0 1
1155 default "0"
1156 depends on MTRR_SANITIZER
1157 help
1158 Enable mtrr cleanup default value
1159
1160config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
1161 int "MTRR cleanup spare reg num (0-7)"
1162 range 0 7
1163 default "1"
1164 depends on MTRR_SANITIZER
1165 help
1166 mtrr cleanup spare entries default, it can be changed via
1167 mtrr_spare_reg_nr=
1168
1093config X86_PAT 1169config X86_PAT
1094 bool 1170 bool
1095 prompt "x86 PAT support" 1171 prompt "x86 PAT support"
@@ -1190,7 +1266,6 @@ config KEXEC
1190 1266
1191config CRASH_DUMP 1267config CRASH_DUMP
1192 bool "kernel crash dumps (EXPERIMENTAL)" 1268 bool "kernel crash dumps (EXPERIMENTAL)"
1193 depends on EXPERIMENTAL
1194 depends on X86_64 || (X86_32 && HIGHMEM) 1269 depends on X86_64 || (X86_32 && HIGHMEM)
1195 help 1270 help
1196 Generate crash dump after being started by kexec. 1271 Generate crash dump after being started by kexec.
@@ -1339,7 +1414,7 @@ config X86_APM_BOOT
1339 1414
1340menuconfig APM 1415menuconfig APM
1341 tristate "APM (Advanced Power Management) BIOS support" 1416 tristate "APM (Advanced Power Management) BIOS support"
1342 depends on X86_32 && PM_SLEEP && !X86_VISWS 1417 depends on X86_32 && PM_SLEEP
1343 ---help--- 1418 ---help---
1344 APM is a BIOS specification for saving power using several different 1419 APM is a BIOS specification for saving power using several different
1345 techniques. This is mostly useful for battery powered laptops with 1420 techniques. This is mostly useful for battery powered laptops with
@@ -1475,8 +1550,7 @@ endmenu
1475menu "Bus options (PCI etc.)" 1550menu "Bus options (PCI etc.)"
1476 1551
1477config PCI 1552config PCI
1478 bool "PCI support" if !X86_VISWS && !X86_VSMP 1553 bool "PCI support"
1479 depends on !X86_VOYAGER
1480 default y 1554 default y
1481 select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) 1555 select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
1482 help 1556 help
@@ -1487,7 +1561,7 @@ config PCI
1487 1561
1488choice 1562choice
1489 prompt "PCI access mode" 1563 prompt "PCI access mode"
1490 depends on X86_32 && PCI && !X86_VISWS 1564 depends on X86_32 && PCI
1491 default PCI_GOANY 1565 default PCI_GOANY
1492 ---help--- 1566 ---help---
1493 On PCI systems, the BIOS can be used to detect the PCI devices and 1567 On PCI systems, the BIOS can be used to detect the PCI devices and
@@ -1524,12 +1598,12 @@ endchoice
1524 1598
1525config PCI_BIOS 1599config PCI_BIOS
1526 def_bool y 1600 def_bool y
1527 depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) 1601 depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY)
1528 1602
1529# x86-64 doesn't support PCI BIOS access from long mode so always go direct. 1603# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
1530config PCI_DIRECT 1604config PCI_DIRECT
1531 def_bool y 1605 def_bool y
1532 depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS) 1606 depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC))
1533 1607
1534config PCI_MMCONFIG 1608config PCI_MMCONFIG
1535 def_bool y 1609 def_bool y
@@ -1589,7 +1663,7 @@ if X86_32
1589 1663
1590config ISA 1664config ISA
1591 bool "ISA support" 1665 bool "ISA support"
1592 depends on !(X86_VOYAGER || X86_VISWS) 1666 depends on !X86_VOYAGER
1593 help 1667 help
1594 Find out whether you have ISA slots on your motherboard. ISA is the 1668 Find out whether you have ISA slots on your motherboard. ISA is the
1595 name of a bus system, i.e. the way the CPU talks to the other stuff 1669 name of a bus system, i.e. the way the CPU talks to the other stuff
@@ -1616,7 +1690,7 @@ config EISA
1616source "drivers/eisa/Kconfig" 1690source "drivers/eisa/Kconfig"
1617 1691
1618config MCA 1692config MCA
1619 bool "MCA support" if !(X86_VISWS || X86_VOYAGER) 1693 bool "MCA support" if !X86_VOYAGER
1620 default y if X86_VOYAGER 1694 default y if X86_VOYAGER
1621 help 1695 help
1622 MicroChannel Architecture is found in some IBM PS/2 machines and 1696 MicroChannel Architecture is found in some IBM PS/2 machines and
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ad6301849a1..abff1b84ed5b 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -344,7 +344,7 @@ config X86_F00F_BUG
344 344
345config X86_WP_WORKS_OK 345config X86_WP_WORKS_OK
346 def_bool y 346 def_bool y
347 depends on X86_32 && !M386 347 depends on !M386
348 348
349config X86_INVLPG 349config X86_INVLPG
350 def_bool y 350 def_bool y
@@ -399,6 +399,10 @@ config X86_TSC
399 def_bool y 399 def_bool y
400 depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 400 depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
401 401
402config X86_CMPXCHG64
403 def_bool y
404 depends on X86_PAE || X86_64
405
402# this should be set for all -march=.. options where the compiler 406# this should be set for all -march=.. options where the compiler
403# generates cmov. 407# generates cmov.
404config X86_CMOV 408config X86_CMOV
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51a..acc0271920f2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -20,6 +20,14 @@ config NONPROMISC_DEVMEM
20 20
21 If in doubt, say Y. 21 If in doubt, say Y.
22 22
23config X86_VERBOSE_BOOTUP
24 bool "Enable verbose x86 bootup info messages"
25 default y
26 help
27 Enables the informational output from the decompression stage
28 (e.g. bzImage) of the boot. If you disable this you will still
29 see errors. Disable this if you want silent bootup.
30
23config EARLY_PRINTK 31config EARLY_PRINTK
24 bool "Early printk" if EMBEDDED 32 bool "Early printk" if EMBEDDED
25 default y 33 default y
@@ -60,7 +68,7 @@ config DEBUG_PAGEALLOC
60config DEBUG_PER_CPU_MAPS 68config DEBUG_PER_CPU_MAPS
61 bool "Debug access to per_cpu maps" 69 bool "Debug access to per_cpu maps"
62 depends on DEBUG_KERNEL 70 depends on DEBUG_KERNEL
63 depends on X86_64_SMP 71 depends on X86_SMP
64 default n 72 default n
65 help 73 help
66 Say Y to verify that the per_cpu map being accessed has 74 Say Y to verify that the per_cpu map being accessed has
@@ -129,15 +137,6 @@ config 4KSTACKS
129 on the VM subsystem for higher order allocations. This option 137 on the VM subsystem for higher order allocations. This option
130 will also use IRQ stacks to compensate for the reduced stackspace. 138 will also use IRQ stacks to compensate for the reduced stackspace.
131 139
132config X86_FIND_SMP_CONFIG
133 def_bool y
134 depends on X86_LOCAL_APIC || X86_VOYAGER
135 depends on X86_32
136
137config X86_MPPARSE
138 def_bool y
139 depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
140
141config DOUBLEFAULT 140config DOUBLEFAULT
142 default y 141 default y
143 bool "Enable doublefault exception handler" if EMBEDDED 142 bool "Enable doublefault exception handler" if EMBEDDED
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3cff3c894cf3..919ce21ea654 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -113,33 +113,11 @@ mcore-y := arch/x86/mach-default/
113mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager 113mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager
114mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ 114mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
115 115
116# VISWS subarch support
117mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws
118mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/
119
120# NUMAQ subarch support
121mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq
122mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default/
123
124# BIGSMP subarch support
125mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp
126mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default/
127
128#Summit subarch support
129mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit
130mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default/
131
132# generic subarchitecture 116# generic subarchitecture
133mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic 117mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
134fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ 118fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/
135mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ 119mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/
136 120
137
138# ES7000 subarch support
139mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000
140fcore-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/
141mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default/
142
143# RDC R-321x subarch support 121# RDC R-321x subarch support
144mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x 122mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x
145mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ 123mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/
@@ -160,6 +138,7 @@ KBUILD_AFLAGS += $(mflags-y)
160 138
161head-y := arch/x86/kernel/head_$(BITS).o 139head-y := arch/x86/kernel/head_$(BITS).o
162head-y += arch/x86/kernel/head$(BITS).o 140head-y += arch/x86/kernel/head$(BITS).o
141head-y += arch/x86/kernel/head.o
163head-y += arch/x86/kernel/init_task.o 142head-y += arch/x86/kernel/init_task.o
164 143
165libs-y += arch/x86/lib/ 144libs-y += arch/x86/lib/
@@ -210,12 +189,12 @@ all: bzImage
210 189
211# KBUILD_IMAGE specify target image being built 190# KBUILD_IMAGE specify target image being built
212 KBUILD_IMAGE := $(boot)/bzImage 191 KBUILD_IMAGE := $(boot)/bzImage
213zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage 192zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
214 193
215zImage bzImage: vmlinux 194zImage bzImage: vmlinux
216 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) 195 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
217 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot 196 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
218 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage 197 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
219 198
220compressed: zImage 199compressed: zImage
221 200
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index e01aafd03bde..4063d630deff 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -1,7 +1,7 @@
1/* -*- linux-c -*- ------------------------------------------------------- * 1/* -*- linux-c -*- ------------------------------------------------------- *
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007-2008 rPath, Inc. - All Rights Reserved
5 * 5 *
6 * This file is part of the Linux kernel, and is made available under 6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 7 * the terms of the GNU General Public License version 2.
@@ -95,6 +95,9 @@ static void enable_a20_kbc(void)
95 95
96 outb(0xdf, 0x60); /* A20 on */ 96 outb(0xdf, 0x60); /* A20 on */
97 empty_8042(); 97 empty_8042();
98
99 outb(0xff, 0x64); /* Null command, but UHCI wants it */
100 empty_8042();
98} 101}
99 102
100static void enable_a20_fast(void) 103static void enable_a20_fast(void)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d8819efac81d..1d5dff4123e1 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -30,6 +30,7 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/boot.h> 31#include <asm/boot.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33#include <asm/processor-flags.h>
33#include <asm/asm-offsets.h> 34#include <asm/asm-offsets.h>
34 35
35.section ".text.head" 36.section ".text.head"
@@ -109,7 +110,7 @@ startup_32:
109 110
110 /* Enable PAE mode */ 111 /* Enable PAE mode */
111 xorl %eax, %eax 112 xorl %eax, %eax
112 orl $(1 << 5), %eax 113 orl $(X86_CR4_PAE), %eax
113 movl %eax, %cr4 114 movl %eax, %cr4
114 115
115 /* 116 /*
@@ -170,7 +171,7 @@ startup_32:
170 pushl %eax 171 pushl %eax
171 172
172 /* Enter paged protected Mode, activating Long Mode */ 173 /* Enter paged protected Mode, activating Long Mode */
173 movl $0x80000001, %eax /* Enable Paging and Protected mode */ 174 movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */
174 movl %eax, %cr0 175 movl %eax, %cr0
175 176
176 /* Jump from 32bit compatibility mode into 64bit mode. */ 177 /* Jump from 32bit compatibility mode into 64bit mode. */
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 90456cee47c3..bc5553b496f7 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -30,6 +30,7 @@
30#include <asm/io.h> 30#include <asm/io.h>
31#include <asm/page.h> 31#include <asm/page.h>
32#include <asm/boot.h> 32#include <asm/boot.h>
33#include <asm/bootparam.h>
33 34
34/* WARNING!! 35/* WARNING!!
35 * This code is compiled with -fPIC and it is relocated dynamically 36 * This code is compiled with -fPIC and it is relocated dynamically
@@ -187,13 +188,8 @@ static void gzip_release(void **);
187/* 188/*
188 * This is set up by the setup-routine at boot-time 189 * This is set up by the setup-routine at boot-time
189 */ 190 */
190static unsigned char *real_mode; /* Pointer to real-mode data */ 191static struct boot_params *real_mode; /* Pointer to real-mode data */
191 192static int quiet;
192#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
193#ifndef STANDARD_MEMORY_BIOS_CALL
194#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
195#endif
196#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
197 193
198extern unsigned char input_data[]; 194extern unsigned char input_data[];
199extern int input_len; 195extern int input_len;
@@ -206,7 +202,8 @@ static void free(void *where);
206static void *memset(void *s, int c, unsigned n); 202static void *memset(void *s, int c, unsigned n);
207static void *memcpy(void *dest, const void *src, unsigned n); 203static void *memcpy(void *dest, const void *src, unsigned n);
208 204
209static void putstr(const char *); 205static void __putstr(int, const char *);
206#define putstr(__x) __putstr(0, __x)
210 207
211#ifdef CONFIG_X86_64 208#ifdef CONFIG_X86_64
212#define memptr long 209#define memptr long
@@ -221,10 +218,6 @@ static char *vidmem;
221static int vidport; 218static int vidport;
222static int lines, cols; 219static int lines, cols;
223 220
224#ifdef CONFIG_X86_NUMAQ
225void *xquad_portio;
226#endif
227
228#include "../../../../lib/inflate.c" 221#include "../../../../lib/inflate.c"
229 222
230static void *malloc(int size) 223static void *malloc(int size)
@@ -270,18 +263,24 @@ static void scroll(void)
270 vidmem[i] = ' '; 263 vidmem[i] = ' ';
271} 264}
272 265
273static void putstr(const char *s) 266static void __putstr(int error, const char *s)
274{ 267{
275 int x, y, pos; 268 int x, y, pos;
276 char c; 269 char c;
277 270
271#ifndef CONFIG_X86_VERBOSE_BOOTUP
272 if (!error)
273 return;
274#endif
275
278#ifdef CONFIG_X86_32 276#ifdef CONFIG_X86_32
279 if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) 277 if (real_mode->screen_info.orig_video_mode == 0 &&
278 lines == 0 && cols == 0)
280 return; 279 return;
281#endif 280#endif
282 281
283 x = RM_SCREEN_INFO.orig_x; 282 x = real_mode->screen_info.orig_x;
284 y = RM_SCREEN_INFO.orig_y; 283 y = real_mode->screen_info.orig_y;
285 284
286 while ((c = *s++) != '\0') { 285 while ((c = *s++) != '\0') {
287 if (c == '\n') { 286 if (c == '\n') {
@@ -302,8 +301,8 @@ static void putstr(const char *s)
302 } 301 }
303 } 302 }
304 303
305 RM_SCREEN_INFO.orig_x = x; 304 real_mode->screen_info.orig_x = x;
306 RM_SCREEN_INFO.orig_y = y; 305 real_mode->screen_info.orig_y = y;
307 306
308 pos = (x + cols * y) * 2; /* Update cursor position */ 307 pos = (x + cols * y) * 2; /* Update cursor position */
309 outb(14, vidport); 308 outb(14, vidport);
@@ -366,9 +365,9 @@ static void flush_window(void)
366 365
367static void error(char *x) 366static void error(char *x)
368{ 367{
369 putstr("\n\n"); 368 __putstr(1, "\n\n");
370 putstr(x); 369 __putstr(1, x);
371 putstr("\n\n -- System halted"); 370 __putstr(1, "\n\n -- System halted");
372 371
373 while (1) 372 while (1)
374 asm("hlt"); 373 asm("hlt");
@@ -395,7 +394,8 @@ static void parse_elf(void *output)
395 return; 394 return;
396 } 395 }
397 396
398 putstr("Parsing ELF... "); 397 if (!quiet)
398 putstr("Parsing ELF... ");
399 399
400 phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); 400 phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
401 if (!phdrs) 401 if (!phdrs)
@@ -430,7 +430,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
430{ 430{
431 real_mode = rmode; 431 real_mode = rmode;
432 432
433 if (RM_SCREEN_INFO.orig_video_mode == 7) { 433 if (real_mode->hdr.loadflags & QUIET_FLAG)
434 quiet = 1;
435
436 if (real_mode->screen_info.orig_video_mode == 7) {
434 vidmem = (char *) 0xb0000; 437 vidmem = (char *) 0xb0000;
435 vidport = 0x3b4; 438 vidport = 0x3b4;
436 } else { 439 } else {
@@ -438,8 +441,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
438 vidport = 0x3d4; 441 vidport = 0x3d4;
439 } 442 }
440 443
441 lines = RM_SCREEN_INFO.orig_video_lines; 444 lines = real_mode->screen_info.orig_video_lines;
442 cols = RM_SCREEN_INFO.orig_video_cols; 445 cols = real_mode->screen_info.orig_video_cols;
443 446
444 window = output; /* Output buffer (Normally at 1M) */ 447 window = output; /* Output buffer (Normally at 1M) */
445 free_mem_ptr = heap; /* Heap */ 448 free_mem_ptr = heap; /* Heap */
@@ -465,9 +468,11 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
465#endif 468#endif
466 469
467 makecrc(); 470 makecrc();
468 putstr("\nDecompressing Linux... "); 471 if (!quiet)
472 putstr("\nDecompressing Linux... ");
469 gunzip(); 473 gunzip();
470 parse_elf(output); 474 parse_elf(output);
471 putstr("done.\nBooting the kernel.\n"); 475 if (!quiet)
476 putstr("done.\nBooting the kernel.\n");
472 return; 477 return;
473} 478}
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index edaadea90aaf..a1310c52fc0c 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -10,16 +10,20 @@
10#define USE_BSD 10#define USE_BSD
11#include <endian.h> 11#include <endian.h>
12 12
13#define MAX_SHDRS 100
14#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 13#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
15static Elf32_Ehdr ehdr; 14static Elf32_Ehdr ehdr;
16static Elf32_Shdr shdr[MAX_SHDRS];
17static Elf32_Sym *symtab[MAX_SHDRS];
18static Elf32_Rel *reltab[MAX_SHDRS];
19static char *strtab[MAX_SHDRS];
20static unsigned long reloc_count, reloc_idx; 15static unsigned long reloc_count, reloc_idx;
21static unsigned long *relocs; 16static unsigned long *relocs;
22 17
18struct section {
19 Elf32_Shdr shdr;
20 struct section *link;
21 Elf32_Sym *symtab;
22 Elf32_Rel *reltab;
23 char *strtab;
24};
25static struct section *secs;
26
23/* 27/*
24 * Following symbols have been audited. There values are constant and do 28 * Following symbols have been audited. There values are constant and do
25 * not change if bzImage is loaded at a different physical address than 29 * not change if bzImage is loaded at a different physical address than
@@ -35,7 +39,7 @@ static int is_safe_abs_reloc(const char* sym_name)
35{ 39{
36 int i; 40 int i;
37 41
38 for(i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { 42 for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) {
39 if (!strcmp(sym_name, safe_abs_relocs[i])) 43 if (!strcmp(sym_name, safe_abs_relocs[i]))
40 /* Match found */ 44 /* Match found */
41 return 1; 45 return 1;
@@ -137,10 +141,10 @@ static const char *sec_name(unsigned shndx)
137{ 141{
138 const char *sec_strtab; 142 const char *sec_strtab;
139 const char *name; 143 const char *name;
140 sec_strtab = strtab[ehdr.e_shstrndx]; 144 sec_strtab = secs[ehdr.e_shstrndx].strtab;
141 name = "<noname>"; 145 name = "<noname>";
142 if (shndx < ehdr.e_shnum) { 146 if (shndx < ehdr.e_shnum) {
143 name = sec_strtab + shdr[shndx].sh_name; 147 name = sec_strtab + secs[shndx].shdr.sh_name;
144 } 148 }
145 else if (shndx == SHN_ABS) { 149 else if (shndx == SHN_ABS) {
146 name = "ABSOLUTE"; 150 name = "ABSOLUTE";
@@ -159,7 +163,7 @@ static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
159 name = sym_strtab + sym->st_name; 163 name = sym_strtab + sym->st_name;
160 } 164 }
161 else { 165 else {
162 name = sec_name(shdr[sym->st_shndx].sh_name); 166 name = sec_name(secs[sym->st_shndx].shdr.sh_name);
163 } 167 }
164 return name; 168 return name;
165} 169}
@@ -244,29 +248,34 @@ static void read_ehdr(FILE *fp)
244static void read_shdrs(FILE *fp) 248static void read_shdrs(FILE *fp)
245{ 249{
246 int i; 250 int i;
247 if (ehdr.e_shnum > MAX_SHDRS) { 251 Elf32_Shdr shdr;
248 die("%d section headers supported: %d\n", 252
249 ehdr.e_shnum, MAX_SHDRS); 253 secs = calloc(ehdr.e_shnum, sizeof(struct section));
254 if (!secs) {
255 die("Unable to allocate %d section headers\n",
256 ehdr.e_shnum);
250 } 257 }
251 if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { 258 if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
252 die("Seek to %d failed: %s\n", 259 die("Seek to %d failed: %s\n",
253 ehdr.e_shoff, strerror(errno)); 260 ehdr.e_shoff, strerror(errno));
254 } 261 }
255 if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { 262 for (i = 0; i < ehdr.e_shnum; i++) {
256 die("Cannot read ELF section headers: %s\n", 263 struct section *sec = &secs[i];
257 strerror(errno)); 264 if (fread(&shdr, sizeof shdr, 1, fp) != 1)
258 } 265 die("Cannot read ELF section headers %d/%d: %s\n",
259 for(i = 0; i < ehdr.e_shnum; i++) { 266 i, ehdr.e_shnum, strerror(errno));
260 shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); 267 sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name);
261 shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); 268 sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type);
262 shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); 269 sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags);
263 shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); 270 sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr);
264 shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); 271 sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset);
265 shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); 272 sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size);
266 shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); 273 sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link);
267 shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); 274 sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info);
268 shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); 275 sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign);
269 shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); 276 sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize);
277 if (sec->shdr.sh_link < ehdr.e_shnum)
278 sec->link = &secs[sec->shdr.sh_link];
270 } 279 }
271 280
272} 281}
@@ -274,20 +283,22 @@ static void read_shdrs(FILE *fp)
274static void read_strtabs(FILE *fp) 283static void read_strtabs(FILE *fp)
275{ 284{
276 int i; 285 int i;
277 for(i = 0; i < ehdr.e_shnum; i++) { 286 for (i = 0; i < ehdr.e_shnum; i++) {
278 if (shdr[i].sh_type != SHT_STRTAB) { 287 struct section *sec = &secs[i];
288 if (sec->shdr.sh_type != SHT_STRTAB) {
279 continue; 289 continue;
280 } 290 }
281 strtab[i] = malloc(shdr[i].sh_size); 291 sec->strtab = malloc(sec->shdr.sh_size);
282 if (!strtab[i]) { 292 if (!sec->strtab) {
283 die("malloc of %d bytes for strtab failed\n", 293 die("malloc of %d bytes for strtab failed\n",
284 shdr[i].sh_size); 294 sec->shdr.sh_size);
285 } 295 }
286 if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { 296 if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
287 die("Seek to %d failed: %s\n", 297 die("Seek to %d failed: %s\n",
288 shdr[i].sh_offset, strerror(errno)); 298 sec->shdr.sh_offset, strerror(errno));
289 } 299 }
290 if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { 300 if (fread(sec->strtab, 1, sec->shdr.sh_size, fp)
301 != sec->shdr.sh_size) {
291 die("Cannot read symbol table: %s\n", 302 die("Cannot read symbol table: %s\n",
292 strerror(errno)); 303 strerror(errno));
293 } 304 }
@@ -297,28 +308,31 @@ static void read_strtabs(FILE *fp)
297static void read_symtabs(FILE *fp) 308static void read_symtabs(FILE *fp)
298{ 309{
299 int i,j; 310 int i,j;
300 for(i = 0; i < ehdr.e_shnum; i++) { 311 for (i = 0; i < ehdr.e_shnum; i++) {
301 if (shdr[i].sh_type != SHT_SYMTAB) { 312 struct section *sec = &secs[i];
313 if (sec->shdr.sh_type != SHT_SYMTAB) {
302 continue; 314 continue;
303 } 315 }
304 symtab[i] = malloc(shdr[i].sh_size); 316 sec->symtab = malloc(sec->shdr.sh_size);
305 if (!symtab[i]) { 317 if (!sec->symtab) {
306 die("malloc of %d bytes for symtab failed\n", 318 die("malloc of %d bytes for symtab failed\n",
307 shdr[i].sh_size); 319 sec->shdr.sh_size);
308 } 320 }
309 if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { 321 if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
310 die("Seek to %d failed: %s\n", 322 die("Seek to %d failed: %s\n",
311 shdr[i].sh_offset, strerror(errno)); 323 sec->shdr.sh_offset, strerror(errno));
312 } 324 }
313 if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { 325 if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
326 != sec->shdr.sh_size) {
314 die("Cannot read symbol table: %s\n", 327 die("Cannot read symbol table: %s\n",
315 strerror(errno)); 328 strerror(errno));
316 } 329 }
317 for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { 330 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
318 symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); 331 Elf32_Sym *sym = &sec->symtab[j];
319 symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); 332 sym->st_name = elf32_to_cpu(sym->st_name);
320 symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); 333 sym->st_value = elf32_to_cpu(sym->st_value);
321 symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); 334 sym->st_size = elf32_to_cpu(sym->st_size);
335 sym->st_shndx = elf16_to_cpu(sym->st_shndx);
322 } 336 }
323 } 337 }
324} 338}
@@ -327,26 +341,29 @@ static void read_symtabs(FILE *fp)
327static void read_relocs(FILE *fp) 341static void read_relocs(FILE *fp)
328{ 342{
329 int i,j; 343 int i,j;
330 for(i = 0; i < ehdr.e_shnum; i++) { 344 for (i = 0; i < ehdr.e_shnum; i++) {
331 if (shdr[i].sh_type != SHT_REL) { 345 struct section *sec = &secs[i];
346 if (sec->shdr.sh_type != SHT_REL) {
332 continue; 347 continue;
333 } 348 }
334 reltab[i] = malloc(shdr[i].sh_size); 349 sec->reltab = malloc(sec->shdr.sh_size);
335 if (!reltab[i]) { 350 if (!sec->reltab) {
336 die("malloc of %d bytes for relocs failed\n", 351 die("malloc of %d bytes for relocs failed\n",
337 shdr[i].sh_size); 352 sec->shdr.sh_size);
338 } 353 }
339 if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { 354 if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
340 die("Seek to %d failed: %s\n", 355 die("Seek to %d failed: %s\n",
341 shdr[i].sh_offset, strerror(errno)); 356 sec->shdr.sh_offset, strerror(errno));
342 } 357 }
343 if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { 358 if (fread(sec->reltab, 1, sec->shdr.sh_size, fp)
359 != sec->shdr.sh_size) {
344 die("Cannot read symbol table: %s\n", 360 die("Cannot read symbol table: %s\n",
345 strerror(errno)); 361 strerror(errno));
346 } 362 }
347 for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { 363 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
348 reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); 364 Elf32_Rel *rel = &sec->reltab[j];
349 reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); 365 rel->r_offset = elf32_to_cpu(rel->r_offset);
366 rel->r_info = elf32_to_cpu(rel->r_info);
350 } 367 }
351 } 368 }
352} 369}
@@ -357,19 +374,21 @@ static void print_absolute_symbols(void)
357 int i; 374 int i;
358 printf("Absolute symbols\n"); 375 printf("Absolute symbols\n");
359 printf(" Num: Value Size Type Bind Visibility Name\n"); 376 printf(" Num: Value Size Type Bind Visibility Name\n");
360 for(i = 0; i < ehdr.e_shnum; i++) { 377 for (i = 0; i < ehdr.e_shnum; i++) {
378 struct section *sec = &secs[i];
361 char *sym_strtab; 379 char *sym_strtab;
362 Elf32_Sym *sh_symtab; 380 Elf32_Sym *sh_symtab;
363 int j; 381 int j;
364 if (shdr[i].sh_type != SHT_SYMTAB) { 382
383 if (sec->shdr.sh_type != SHT_SYMTAB) {
365 continue; 384 continue;
366 } 385 }
367 sh_symtab = symtab[i]; 386 sh_symtab = sec->symtab;
368 sym_strtab = strtab[shdr[i].sh_link]; 387 sym_strtab = sec->link->strtab;
369 for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { 388 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
370 Elf32_Sym *sym; 389 Elf32_Sym *sym;
371 const char *name; 390 const char *name;
372 sym = &symtab[i][j]; 391 sym = &sec->symtab[j];
373 name = sym_name(sym_strtab, sym); 392 name = sym_name(sym_strtab, sym);
374 if (sym->st_shndx != SHN_ABS) { 393 if (sym->st_shndx != SHN_ABS) {
375 continue; 394 continue;
@@ -389,26 +408,27 @@ static void print_absolute_relocs(void)
389{ 408{
390 int i, printed = 0; 409 int i, printed = 0;
391 410
392 for(i = 0; i < ehdr.e_shnum; i++) { 411 for (i = 0; i < ehdr.e_shnum; i++) {
412 struct section *sec = &secs[i];
413 struct section *sec_applies, *sec_symtab;
393 char *sym_strtab; 414 char *sym_strtab;
394 Elf32_Sym *sh_symtab; 415 Elf32_Sym *sh_symtab;
395 unsigned sec_applies, sec_symtab;
396 int j; 416 int j;
397 if (shdr[i].sh_type != SHT_REL) { 417 if (sec->shdr.sh_type != SHT_REL) {
398 continue; 418 continue;
399 } 419 }
400 sec_symtab = shdr[i].sh_link; 420 sec_symtab = sec->link;
401 sec_applies = shdr[i].sh_info; 421 sec_applies = &secs[sec->shdr.sh_info];
402 if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { 422 if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
403 continue; 423 continue;
404 } 424 }
405 sh_symtab = symtab[sec_symtab]; 425 sh_symtab = sec_symtab->symtab;
406 sym_strtab = strtab[shdr[sec_symtab].sh_link]; 426 sym_strtab = sec_symtab->link->strtab;
407 for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { 427 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
408 Elf32_Rel *rel; 428 Elf32_Rel *rel;
409 Elf32_Sym *sym; 429 Elf32_Sym *sym;
410 const char *name; 430 const char *name;
411 rel = &reltab[i][j]; 431 rel = &sec->reltab[j];
412 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; 432 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
413 name = sym_name(sym_strtab, sym); 433 name = sym_name(sym_strtab, sym);
414 if (sym->st_shndx != SHN_ABS) { 434 if (sym->st_shndx != SHN_ABS) {
@@ -456,26 +476,28 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
456{ 476{
457 int i; 477 int i;
458 /* Walk through the relocations */ 478 /* Walk through the relocations */
459 for(i = 0; i < ehdr.e_shnum; i++) { 479 for (i = 0; i < ehdr.e_shnum; i++) {
460 char *sym_strtab; 480 char *sym_strtab;
461 Elf32_Sym *sh_symtab; 481 Elf32_Sym *sh_symtab;
462 unsigned sec_applies, sec_symtab; 482 struct section *sec_applies, *sec_symtab;
463 int j; 483 int j;
464 if (shdr[i].sh_type != SHT_REL) { 484 struct section *sec = &secs[i];
485
486 if (sec->shdr.sh_type != SHT_REL) {
465 continue; 487 continue;
466 } 488 }
467 sec_symtab = shdr[i].sh_link; 489 sec_symtab = sec->link;
468 sec_applies = shdr[i].sh_info; 490 sec_applies = &secs[sec->shdr.sh_info];
469 if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { 491 if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
470 continue; 492 continue;
471 } 493 }
472 sh_symtab = symtab[sec_symtab]; 494 sh_symtab = sec_symtab->symtab;
473 sym_strtab = strtab[shdr[sec_symtab].sh_link]; 495 sym_strtab = sec->link->strtab;
474 for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { 496 for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
475 Elf32_Rel *rel; 497 Elf32_Rel *rel;
476 Elf32_Sym *sym; 498 Elf32_Sym *sym;
477 unsigned r_type; 499 unsigned r_type;
478 rel = &reltab[i][j]; 500 rel = &sec->reltab[j];
479 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; 501 sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
480 r_type = ELF32_R_TYPE(rel->r_info); 502 r_type = ELF32_R_TYPE(rel->r_info);
481 /* Don't visit relocations to absolute symbols */ 503 /* Don't visit relocations to absolute symbols */
@@ -539,7 +561,7 @@ static void emit_relocs(int as_text)
539 */ 561 */
540 printf(".section \".data.reloc\",\"a\"\n"); 562 printf(".section \".data.reloc\",\"a\"\n");
541 printf(".balign 4\n"); 563 printf(".balign 4\n");
542 for(i = 0; i < reloc_count; i++) { 564 for (i = 0; i < reloc_count; i++) {
543 printf("\t .long 0x%08lx\n", relocs[i]); 565 printf("\t .long 0x%08lx\n", relocs[i]);
544 } 566 }
545 printf("\n"); 567 printf("\n");
@@ -550,7 +572,7 @@ static void emit_relocs(int as_text)
550 /* Print a stop */ 572 /* Print a stop */
551 printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); 573 printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
552 /* Now print each relocation */ 574 /* Now print each relocation */
553 for(i = 0; i < reloc_count; i++) { 575 for (i = 0; i < reloc_count; i++) {
554 buf[0] = (relocs[i] >> 0) & 0xff; 576 buf[0] = (relocs[i] >> 0) & 0xff;
555 buf[1] = (relocs[i] >> 8) & 0xff; 577 buf[1] = (relocs[i] >> 8) & 0xff;
556 buf[2] = (relocs[i] >> 16) & 0xff; 578 buf[2] = (relocs[i] >> 16) & 0xff;
@@ -577,7 +599,7 @@ int main(int argc, char **argv)
577 show_absolute_relocs = 0; 599 show_absolute_relocs = 0;
578 as_text = 0; 600 as_text = 0;
579 fname = NULL; 601 fname = NULL;
580 for(i = 1; i < argc; i++) { 602 for (i = 1; i < argc; i++) {
581 char *arg = argv[i]; 603 char *arg = argv[i];
582 if (*arg == '-') { 604 if (*arg == '-') {
583 if (strcmp(argv[1], "--abs-syms") == 0) { 605 if (strcmp(argv[1], "--abs-syms") == 0) {
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 00e19edd852c..92d6fd73dc7d 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -28,6 +28,8 @@ static char *cpu_name(int level)
28 if (level == 64) { 28 if (level == 64) {
29 return "x86-64"; 29 return "x86-64";
30 } else { 30 } else {
31 if (level == 15)
32 level = 6;
31 sprintf(buf, "i%d86", level); 33 sprintf(buf, "i%d86", level);
32 return buf; 34 return buf;
33 } 35 }
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 77569a4a3be1..2296164b54d2 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -165,6 +165,10 @@ void main(void)
165 /* Set the video mode */ 165 /* Set the video mode */
166 set_video(); 166 set_video();
167 167
168 /* Parse command line for 'quiet' and pass it to decompressor. */
169 if (cmdline_find_option_bool("quiet"))
170 boot_params.hdr.loadflags |= QUIET_FLAG;
171
168 /* Do the last things and invoke protected mode */ 172 /* Do the last things and invoke protected mode */
169 go_to_protected_mode(); 173 go_to_protected_mode();
170} 174}
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index acad32eb4290..53165c97336b 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -13,6 +13,7 @@
13 */ 13 */
14 14
15#include "boot.h" 15#include "boot.h"
16#include <linux/kernel.h>
16 17
17#define SMAP 0x534d4150 /* ASCII "SMAP" */ 18#define SMAP 0x534d4150 /* ASCII "SMAP" */
18 19
@@ -53,7 +54,7 @@ static int detect_memory_e820(void)
53 54
54 count++; 55 count++;
55 desc++; 56 desc++;
56 } while (next && count < E820MAX); 57 } while (next && count < ARRAY_SIZE(boot_params.e820_map));
57 58
58 return boot_params.e820_entries = count; 59 return boot_params.e820_entries = count;
59} 60}
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index ab049d40a884..141b6e20ed31 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -33,6 +33,8 @@ protected_mode_jump:
33 movw %cs, %bx 33 movw %cs, %bx
34 shll $4, %ebx 34 shll $4, %ebx
35 addl %ebx, 2f 35 addl %ebx, 2f
36 jmp 1f # Short jump to serialize on 386/486
371:
36 38
37 movw $__BOOT_DS, %cx 39 movw $__BOOT_DS, %cx
38 movw $__BOOT_TSS, %di 40 movw $__BOOT_TSS, %di
@@ -40,8 +42,6 @@ protected_mode_jump:
40 movl %cr0, %edx 42 movl %cr0, %edx
41 orb $X86_CR0_PE, %dl # Protected mode 43 orb $X86_CR0_PE, %dl # Protected mode
42 movl %edx, %cr0 44 movl %edx, %cr0
43 jmp 1f # Short jump to serialize on 386/486
441:
45 45
46 # Transition to 32-bit mode 46 # Transition to 32-bit mode
47 .byte 0x66, 0xea # ljmpl opcode 47 .byte 0x66, 0xea # ljmpl opcode
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 40ecb8d7688c..b939cb476dec 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -259,8 +259,7 @@ static int vga_probe(void)
259 return mode_count[adapter]; 259 return mode_count[adapter];
260} 260}
261 261
262__videocard video_vga = 262__videocard video_vga = {
263{
264 .card_name = "VGA", 263 .card_name = "VGA",
265 .probe = vga_probe, 264 .probe = vga_probe,
266 .set_mode = vga_set_mode, 265 .set_mode = vga_set_mode,
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index ad7ddaaff588..9bc34e2033ec 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,54 +1,103 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.22-git14 3# Linux kernel version: 2.6.26-rc1
4# Fri Jul 20 09:53:15 2007 4# Sun May 4 19:59:02 2008
5# 5#
6# CONFIG_64BIT is not set
6CONFIG_X86_32=y 7CONFIG_X86_32=y
8# CONFIG_X86_64 is not set
9CONFIG_X86=y
10CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig"
11# CONFIG_GENERIC_LOCKBREAK is not set
7CONFIG_GENERIC_TIME=y 12CONFIG_GENERIC_TIME=y
13CONFIG_GENERIC_CMOS_UPDATE=y
8CONFIG_CLOCKSOURCE_WATCHDOG=y 14CONFIG_CLOCKSOURCE_WATCHDOG=y
9CONFIG_GENERIC_CLOCKEVENTS=y 15CONFIG_GENERIC_CLOCKEVENTS=y
10CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y 16CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
11CONFIG_LOCKDEP_SUPPORT=y 17CONFIG_LOCKDEP_SUPPORT=y
12CONFIG_STACKTRACE_SUPPORT=y 18CONFIG_STACKTRACE_SUPPORT=y
13CONFIG_SEMAPHORE_SLEEPERS=y 19CONFIG_HAVE_LATENCYTOP_SUPPORT=y
14CONFIG_X86=y 20CONFIG_FAST_CMPXCHG_LOCAL=y
15CONFIG_MMU=y 21CONFIG_MMU=y
16CONFIG_ZONE_DMA=y 22CONFIG_ZONE_DMA=y
17CONFIG_QUICKLIST=y
18CONFIG_GENERIC_ISA_DMA=y 23CONFIG_GENERIC_ISA_DMA=y
19CONFIG_GENERIC_IOMAP=y 24CONFIG_GENERIC_IOMAP=y
20CONFIG_GENERIC_BUG=y 25CONFIG_GENERIC_BUG=y
21CONFIG_GENERIC_HWEIGHT=y 26CONFIG_GENERIC_HWEIGHT=y
27# CONFIG_GENERIC_GPIO is not set
22CONFIG_ARCH_MAY_HAVE_PC_FDC=y 28CONFIG_ARCH_MAY_HAVE_PC_FDC=y
23CONFIG_DMI=y 29# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
24CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 30CONFIG_RWSEM_XCHGADD_ALGORITHM=y
31# CONFIG_ARCH_HAS_ILOG2_U32 is not set
32# CONFIG_ARCH_HAS_ILOG2_U64 is not set
33CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
34CONFIG_GENERIC_CALIBRATE_DELAY=y
35# CONFIG_GENERIC_TIME_VSYSCALL is not set
36CONFIG_ARCH_HAS_CPU_RELAX=y
37CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
38CONFIG_HAVE_SETUP_PER_CPU_AREA=y
39# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
40CONFIG_ARCH_HIBERNATION_POSSIBLE=y
41CONFIG_ARCH_SUSPEND_POSSIBLE=y
42# CONFIG_ZONE_DMA32 is not set
43CONFIG_ARCH_POPULATES_NODE_MAP=y
44# CONFIG_AUDIT_ARCH is not set
45CONFIG_ARCH_SUPPORTS_AOUT=y
46CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
47CONFIG_GENERIC_HARDIRQS=y
48CONFIG_GENERIC_IRQ_PROBE=y
49CONFIG_GENERIC_PENDING_IRQ=y
50CONFIG_X86_SMP=y
51CONFIG_X86_32_SMP=y
52CONFIG_X86_HT=y
53CONFIG_X86_BIOS_REBOOT=y
54CONFIG_X86_TRAMPOLINE=y
55CONFIG_KTIME_SCALAR=y
25 56
26# 57#
27# Code maturity level options 58# General setup
28# 59#
29CONFIG_EXPERIMENTAL=y 60CONFIG_EXPERIMENTAL=y
30CONFIG_LOCK_KERNEL=y 61CONFIG_LOCK_KERNEL=y
31CONFIG_INIT_ENV_ARG_LIMIT=32 62CONFIG_INIT_ENV_ARG_LIMIT=32
32
33#
34# General setup
35#
36CONFIG_LOCALVERSION="" 63CONFIG_LOCALVERSION=""
37CONFIG_LOCALVERSION_AUTO=y 64# CONFIG_LOCALVERSION_AUTO is not set
38CONFIG_SWAP=y 65CONFIG_SWAP=y
39CONFIG_SYSVIPC=y 66CONFIG_SYSVIPC=y
40CONFIG_SYSVIPC_SYSCTL=y 67CONFIG_SYSVIPC_SYSCTL=y
41CONFIG_POSIX_MQUEUE=y 68CONFIG_POSIX_MQUEUE=y
42# CONFIG_BSD_PROCESS_ACCT is not set 69CONFIG_BSD_PROCESS_ACCT=y
43# CONFIG_TASKSTATS is not set 70# CONFIG_BSD_PROCESS_ACCT_V3 is not set
44# CONFIG_USER_NS is not set 71CONFIG_TASKSTATS=y
45# CONFIG_AUDIT is not set 72CONFIG_TASK_DELAY_ACCT=y
46CONFIG_IKCONFIG=y 73CONFIG_TASK_XACCT=y
47CONFIG_IKCONFIG_PROC=y 74CONFIG_TASK_IO_ACCOUNTING=y
48CONFIG_LOG_BUF_SHIFT=18 75CONFIG_AUDIT=y
49# CONFIG_CPUSETS is not set 76CONFIG_AUDITSYSCALL=y
50CONFIG_SYSFS_DEPRECATED=y 77CONFIG_AUDIT_TREE=y
78# CONFIG_IKCONFIG is not set
79CONFIG_LOG_BUF_SHIFT=17
80CONFIG_CGROUPS=y
81# CONFIG_CGROUP_DEBUG is not set
82CONFIG_CGROUP_NS=y
83# CONFIG_CGROUP_DEVICE is not set
84CONFIG_CPUSETS=y
85CONFIG_GROUP_SCHED=y
86CONFIG_FAIR_GROUP_SCHED=y
87# CONFIG_RT_GROUP_SCHED is not set
88# CONFIG_USER_SCHED is not set
89CONFIG_CGROUP_SCHED=y
90CONFIG_CGROUP_CPUACCT=y
91CONFIG_RESOURCE_COUNTERS=y
92# CONFIG_CGROUP_MEM_RES_CTLR is not set
93# CONFIG_SYSFS_DEPRECATED_V2 is not set
94CONFIG_PROC_PID_CPUSET=y
51CONFIG_RELAY=y 95CONFIG_RELAY=y
96CONFIG_NAMESPACES=y
97CONFIG_UTS_NS=y
98CONFIG_IPC_NS=y
99CONFIG_USER_NS=y
100CONFIG_PID_NS=y
52CONFIG_BLK_DEV_INITRD=y 101CONFIG_BLK_DEV_INITRD=y
53CONFIG_INITRAMFS_SOURCE="" 102CONFIG_INITRAMFS_SOURCE=""
54CONFIG_CC_OPTIMIZE_FOR_SIZE=y 103CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -56,13 +105,15 @@ CONFIG_SYSCTL=y
56# CONFIG_EMBEDDED is not set 105# CONFIG_EMBEDDED is not set
57CONFIG_UID16=y 106CONFIG_UID16=y
58CONFIG_SYSCTL_SYSCALL=y 107CONFIG_SYSCTL_SYSCALL=y
108CONFIG_SYSCTL_SYSCALL_CHECK=y
59CONFIG_KALLSYMS=y 109CONFIG_KALLSYMS=y
60CONFIG_KALLSYMS_ALL=y 110CONFIG_KALLSYMS_ALL=y
61# CONFIG_KALLSYMS_EXTRA_PASS is not set 111CONFIG_KALLSYMS_EXTRA_PASS=y
62CONFIG_HOTPLUG=y 112CONFIG_HOTPLUG=y
63CONFIG_PRINTK=y 113CONFIG_PRINTK=y
64CONFIG_BUG=y 114CONFIG_BUG=y
65CONFIG_ELF_CORE=y 115CONFIG_ELF_CORE=y
116# CONFIG_COMPAT_BRK is not set
66CONFIG_BASE_FULL=y 117CONFIG_BASE_FULL=y
67CONFIG_FUTEX=y 118CONFIG_FUTEX=y
68CONFIG_ANON_INODES=y 119CONFIG_ANON_INODES=y
@@ -76,6 +127,17 @@ CONFIG_SLUB_DEBUG=y
76# CONFIG_SLAB is not set 127# CONFIG_SLAB is not set
77CONFIG_SLUB=y 128CONFIG_SLUB=y
78# CONFIG_SLOB is not set 129# CONFIG_SLOB is not set
130CONFIG_PROFILING=y
131CONFIG_MARKERS=y
132# CONFIG_OPROFILE is not set
133CONFIG_HAVE_OPROFILE=y
134CONFIG_KPROBES=y
135CONFIG_KRETPROBES=y
136CONFIG_HAVE_KPROBES=y
137CONFIG_HAVE_KRETPROBES=y
138# CONFIG_HAVE_DMA_ATTRS is not set
139CONFIG_PROC_PAGE_MONITOR=y
140CONFIG_SLABINFO=y
79CONFIG_RT_MUTEXES=y 141CONFIG_RT_MUTEXES=y
80# CONFIG_TINY_SHMEM is not set 142# CONFIG_TINY_SHMEM is not set
81CONFIG_BASE_SMALL=0 143CONFIG_BASE_SMALL=0
@@ -87,10 +149,10 @@ CONFIG_MODULE_FORCE_UNLOAD=y
87# CONFIG_KMOD is not set 149# CONFIG_KMOD is not set
88CONFIG_STOP_MACHINE=y 150CONFIG_STOP_MACHINE=y
89CONFIG_BLOCK=y 151CONFIG_BLOCK=y
90CONFIG_LBD=y 152# CONFIG_LBD is not set
91# CONFIG_BLK_DEV_IO_TRACE is not set 153CONFIG_BLK_DEV_IO_TRACE=y
92# CONFIG_LSF is not set 154# CONFIG_LSF is not set
93# CONFIG_BLK_DEV_BSG is not set 155CONFIG_BLK_DEV_BSG=y
94 156
95# 157#
96# IO Schedulers 158# IO Schedulers
@@ -103,7 +165,8 @@ CONFIG_IOSCHED_CFQ=y
103# CONFIG_DEFAULT_DEADLINE is not set 165# CONFIG_DEFAULT_DEADLINE is not set
104CONFIG_DEFAULT_CFQ=y 166CONFIG_DEFAULT_CFQ=y
105# CONFIG_DEFAULT_NOOP is not set 167# CONFIG_DEFAULT_NOOP is not set
106CONFIG_DEFAULT_IOSCHED="anticipatory" 168CONFIG_DEFAULT_IOSCHED="cfq"
169CONFIG_CLASSIC_RCU=y
107 170
108# 171#
109# Processor type and features 172# Processor type and features
@@ -111,18 +174,21 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
111CONFIG_TICK_ONESHOT=y 174CONFIG_TICK_ONESHOT=y
112CONFIG_NO_HZ=y 175CONFIG_NO_HZ=y
113CONFIG_HIGH_RES_TIMERS=y 176CONFIG_HIGH_RES_TIMERS=y
177CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
114CONFIG_SMP=y 178CONFIG_SMP=y
115# CONFIG_X86_PC is not set 179CONFIG_X86_PC=y
116# CONFIG_X86_ELAN is not set 180# CONFIG_X86_ELAN is not set
117# CONFIG_X86_VOYAGER is not set 181# CONFIG_X86_VOYAGER is not set
118# CONFIG_X86_NUMAQ is not set 182# CONFIG_X86_NUMAQ is not set
119# CONFIG_X86_SUMMIT is not set 183# CONFIG_X86_SUMMIT is not set
120# CONFIG_X86_BIGSMP is not set 184# CONFIG_X86_BIGSMP is not set
121# CONFIG_X86_VISWS is not set 185# CONFIG_X86_VISWS is not set
122CONFIG_X86_GENERICARCH=y 186# CONFIG_X86_GENERICARCH is not set
123# CONFIG_X86_ES7000 is not set 187# CONFIG_X86_ES7000 is not set
124# CONFIG_PARAVIRT is not set 188# CONFIG_X86_RDC321X is not set
125CONFIG_X86_CYCLONE_TIMER=y 189# CONFIG_X86_VSMP is not set
190CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
191# CONFIG_PARAVIRT_GUEST is not set
126# CONFIG_M386 is not set 192# CONFIG_M386 is not set
127# CONFIG_M486 is not set 193# CONFIG_M486 is not set
128# CONFIG_M586 is not set 194# CONFIG_M586 is not set
@@ -130,9 +196,8 @@ CONFIG_X86_CYCLONE_TIMER=y
130# CONFIG_M586MMX is not set 196# CONFIG_M586MMX is not set
131# CONFIG_M686 is not set 197# CONFIG_M686 is not set
132# CONFIG_MPENTIUMII is not set 198# CONFIG_MPENTIUMII is not set
133CONFIG_MPENTIUMIII=y 199# CONFIG_MPENTIUMIII is not set
134# CONFIG_MPENTIUMM is not set 200# CONFIG_MPENTIUMM is not set
135# CONFIG_MCORE2 is not set
136# CONFIG_MPENTIUM4 is not set 201# CONFIG_MPENTIUM4 is not set
137# CONFIG_MK6 is not set 202# CONFIG_MK6 is not set
138# CONFIG_MK7 is not set 203# CONFIG_MK7 is not set
@@ -147,14 +212,14 @@ CONFIG_MPENTIUMIII=y
147# CONFIG_MCYRIXIII is not set 212# CONFIG_MCYRIXIII is not set
148# CONFIG_MVIAC3_2 is not set 213# CONFIG_MVIAC3_2 is not set
149# CONFIG_MVIAC7 is not set 214# CONFIG_MVIAC7 is not set
150CONFIG_X86_GENERIC=y 215# CONFIG_MPSC is not set
216CONFIG_MCORE2=y
217# CONFIG_GENERIC_CPU is not set
218# CONFIG_X86_GENERIC is not set
219CONFIG_X86_CPU=y
151CONFIG_X86_CMPXCHG=y 220CONFIG_X86_CMPXCHG=y
152CONFIG_X86_L1_CACHE_SHIFT=7 221CONFIG_X86_L1_CACHE_SHIFT=6
153CONFIG_X86_XADD=y 222CONFIG_X86_XADD=y
154CONFIG_RWSEM_XCHGADD_ALGORITHM=y
155# CONFIG_ARCH_HAS_ILOG2_U32 is not set
156# CONFIG_ARCH_HAS_ILOG2_U64 is not set
157CONFIG_GENERIC_CALIBRATE_DELAY=y
158CONFIG_X86_WP_WORKS_OK=y 223CONFIG_X86_WP_WORKS_OK=y
159CONFIG_X86_INVLPG=y 224CONFIG_X86_INVLPG=y
160CONFIG_X86_BSWAP=y 225CONFIG_X86_BSWAP=y
@@ -162,106 +227,120 @@ CONFIG_X86_POPAD_OK=y
162CONFIG_X86_GOOD_APIC=y 227CONFIG_X86_GOOD_APIC=y
163CONFIG_X86_INTEL_USERCOPY=y 228CONFIG_X86_INTEL_USERCOPY=y
164CONFIG_X86_USE_PPRO_CHECKSUM=y 229CONFIG_X86_USE_PPRO_CHECKSUM=y
230CONFIG_X86_P6_NOP=y
165CONFIG_X86_TSC=y 231CONFIG_X86_TSC=y
166CONFIG_X86_CMOV=y 232CONFIG_X86_MINIMUM_CPU_FAMILY=6
167CONFIG_X86_MINIMUM_CPU_FAMILY=4 233CONFIG_X86_DEBUGCTLMSR=y
168CONFIG_HPET_TIMER=y 234CONFIG_HPET_TIMER=y
169CONFIG_HPET_EMULATE_RTC=y 235CONFIG_HPET_EMULATE_RTC=y
170CONFIG_NR_CPUS=32 236CONFIG_DMI=y
171CONFIG_SCHED_SMT=y 237# CONFIG_IOMMU_HELPER is not set
238CONFIG_NR_CPUS=4
239# CONFIG_SCHED_SMT is not set
172CONFIG_SCHED_MC=y 240CONFIG_SCHED_MC=y
173# CONFIG_PREEMPT_NONE is not set 241# CONFIG_PREEMPT_NONE is not set
174CONFIG_PREEMPT_VOLUNTARY=y 242CONFIG_PREEMPT_VOLUNTARY=y
175# CONFIG_PREEMPT is not set 243# CONFIG_PREEMPT is not set
176CONFIG_PREEMPT_BKL=y
177CONFIG_X86_LOCAL_APIC=y 244CONFIG_X86_LOCAL_APIC=y
178CONFIG_X86_IO_APIC=y 245CONFIG_X86_IO_APIC=y
179CONFIG_X86_MCE=y 246# CONFIG_X86_MCE is not set
180CONFIG_X86_MCE_NONFATAL=y
181CONFIG_X86_MCE_P4THERMAL=y
182CONFIG_VM86=y 247CONFIG_VM86=y
183# CONFIG_TOSHIBA is not set 248# CONFIG_TOSHIBA is not set
184# CONFIG_I8K is not set 249# CONFIG_I8K is not set
185# CONFIG_X86_REBOOTFIXUPS is not set 250# CONFIG_X86_REBOOTFIXUPS is not set
186CONFIG_MICROCODE=y 251# CONFIG_MICROCODE is not set
187CONFIG_MICROCODE_OLD_INTERFACE=y
188CONFIG_X86_MSR=y 252CONFIG_X86_MSR=y
189CONFIG_X86_CPUID=y 253CONFIG_X86_CPUID=y
190
191#
192# Firmware Drivers
193#
194# CONFIG_EDD is not set
195# CONFIG_DELL_RBU is not set
196# CONFIG_DCDBAS is not set
197CONFIG_DMIID=y
198# CONFIG_NOHIGHMEM is not set 254# CONFIG_NOHIGHMEM is not set
199CONFIG_HIGHMEM4G=y 255CONFIG_HIGHMEM4G=y
200# CONFIG_HIGHMEM64G is not set 256# CONFIG_HIGHMEM64G is not set
201CONFIG_PAGE_OFFSET=0xC0000000 257CONFIG_PAGE_OFFSET=0xC0000000
202CONFIG_HIGHMEM=y 258CONFIG_HIGHMEM=y
203CONFIG_ARCH_POPULATES_NODE_MAP=y 259CONFIG_NEED_NODE_MEMMAP_SIZE=y
260CONFIG_ARCH_FLATMEM_ENABLE=y
261CONFIG_ARCH_SPARSEMEM_ENABLE=y
262CONFIG_ARCH_SELECT_MEMORY_MODEL=y
204CONFIG_SELECT_MEMORY_MODEL=y 263CONFIG_SELECT_MEMORY_MODEL=y
205CONFIG_FLATMEM_MANUAL=y 264# CONFIG_FLATMEM_MANUAL is not set
206# CONFIG_DISCONTIGMEM_MANUAL is not set 265# CONFIG_DISCONTIGMEM_MANUAL is not set
207# CONFIG_SPARSEMEM_MANUAL is not set 266CONFIG_SPARSEMEM_MANUAL=y
208CONFIG_FLATMEM=y 267CONFIG_SPARSEMEM=y
209CONFIG_FLAT_NODE_MEM_MAP=y 268CONFIG_HAVE_MEMORY_PRESENT=y
210# CONFIG_SPARSEMEM_STATIC is not set 269CONFIG_SPARSEMEM_STATIC=y
270# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
271
272#
273# Memory hotplug is currently incompatible with Software Suspend
274#
275CONFIG_PAGEFLAGS_EXTENDED=y
211CONFIG_SPLIT_PTLOCK_CPUS=4 276CONFIG_SPLIT_PTLOCK_CPUS=4
212CONFIG_RESOURCES_64BIT=y 277CONFIG_RESOURCES_64BIT=y
213CONFIG_ZONE_DMA_FLAG=1 278CONFIG_ZONE_DMA_FLAG=1
214CONFIG_BOUNCE=y 279CONFIG_BOUNCE=y
215CONFIG_NR_QUICK=1
216CONFIG_VIRT_TO_BUS=y 280CONFIG_VIRT_TO_BUS=y
217# CONFIG_HIGHPTE is not set 281# CONFIG_HIGHPTE is not set
218# CONFIG_MATH_EMULATION is not set 282# CONFIG_MATH_EMULATION is not set
219CONFIG_MTRR=y 283CONFIG_MTRR=y
220# CONFIG_EFI is not set 284# CONFIG_X86_PAT is not set
285CONFIG_EFI=y
221# CONFIG_IRQBALANCE is not set 286# CONFIG_IRQBALANCE is not set
222CONFIG_SECCOMP=y 287CONFIG_SECCOMP=y
223# CONFIG_HZ_100 is not set 288# CONFIG_HZ_100 is not set
224CONFIG_HZ_250=y 289# CONFIG_HZ_250 is not set
225# CONFIG_HZ_300 is not set 290# CONFIG_HZ_300 is not set
226# CONFIG_HZ_1000 is not set 291CONFIG_HZ_1000=y
227CONFIG_HZ=250 292CONFIG_HZ=1000
228# CONFIG_KEXEC is not set 293CONFIG_SCHED_HRTICK=y
229# CONFIG_CRASH_DUMP is not set 294CONFIG_KEXEC=y
230CONFIG_PHYSICAL_START=0x100000 295CONFIG_CRASH_DUMP=y
231# CONFIG_RELOCATABLE is not set 296CONFIG_PHYSICAL_START=0x1000000
232CONFIG_PHYSICAL_ALIGN=0x100000 297CONFIG_RELOCATABLE=y
233# CONFIG_HOTPLUG_CPU is not set 298CONFIG_PHYSICAL_ALIGN=0x200000
234CONFIG_COMPAT_VDSO=y 299CONFIG_HOTPLUG_CPU=y
300# CONFIG_COMPAT_VDSO is not set
235CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y 301CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
236 302
237# 303#
238# Power management options (ACPI, APM) 304# Power management options
239# 305#
240CONFIG_PM=y 306CONFIG_PM=y
241CONFIG_PM_LEGACY=y 307CONFIG_PM_DEBUG=y
242# CONFIG_PM_DEBUG is not set 308# CONFIG_PM_VERBOSE is not set
243 309CONFIG_CAN_PM_TRACE=y
244# 310CONFIG_PM_TRACE=y
245# ACPI (Advanced Configuration and Power Interface) Support 311CONFIG_PM_TRACE_RTC=y
246# 312CONFIG_PM_SLEEP_SMP=y
313CONFIG_PM_SLEEP=y
314CONFIG_SUSPEND=y
315CONFIG_SUSPEND_FREEZER=y
316CONFIG_HIBERNATION=y
317CONFIG_PM_STD_PARTITION=""
247CONFIG_ACPI=y 318CONFIG_ACPI=y
319CONFIG_ACPI_SLEEP=y
248CONFIG_ACPI_PROCFS=y 320CONFIG_ACPI_PROCFS=y
321CONFIG_ACPI_PROCFS_POWER=y
322CONFIG_ACPI_SYSFS_POWER=y
323CONFIG_ACPI_PROC_EVENT=y
249CONFIG_ACPI_AC=y 324CONFIG_ACPI_AC=y
250CONFIG_ACPI_BATTERY=y 325CONFIG_ACPI_BATTERY=y
251CONFIG_ACPI_BUTTON=y 326CONFIG_ACPI_BUTTON=y
252CONFIG_ACPI_FAN=y 327CONFIG_ACPI_FAN=y
253# CONFIG_ACPI_DOCK is not set 328CONFIG_ACPI_DOCK=y
329# CONFIG_ACPI_BAY is not set
254CONFIG_ACPI_PROCESSOR=y 330CONFIG_ACPI_PROCESSOR=y
331CONFIG_ACPI_HOTPLUG_CPU=y
255CONFIG_ACPI_THERMAL=y 332CONFIG_ACPI_THERMAL=y
333# CONFIG_ACPI_WMI is not set
256# CONFIG_ACPI_ASUS is not set 334# CONFIG_ACPI_ASUS is not set
257# CONFIG_ACPI_TOSHIBA is not set 335# CONFIG_ACPI_TOSHIBA is not set
258CONFIG_ACPI_BLACKLIST_YEAR=2001 336# CONFIG_ACPI_CUSTOM_DSDT is not set
259CONFIG_ACPI_DEBUG=y 337CONFIG_ACPI_BLACKLIST_YEAR=0
338# CONFIG_ACPI_DEBUG is not set
260CONFIG_ACPI_EC=y 339CONFIG_ACPI_EC=y
261CONFIG_ACPI_POWER=y 340CONFIG_ACPI_POWER=y
262CONFIG_ACPI_SYSTEM=y 341CONFIG_ACPI_SYSTEM=y
263CONFIG_X86_PM_TIMER=y 342CONFIG_X86_PM_TIMER=y
264# CONFIG_ACPI_CONTAINER is not set 343CONFIG_ACPI_CONTAINER=y
265# CONFIG_ACPI_SBS is not set 344# CONFIG_ACPI_SBS is not set
266# CONFIG_APM is not set 345# CONFIG_APM is not set
267 346
@@ -271,15 +350,17 @@ CONFIG_X86_PM_TIMER=y
271CONFIG_CPU_FREQ=y 350CONFIG_CPU_FREQ=y
272CONFIG_CPU_FREQ_TABLE=y 351CONFIG_CPU_FREQ_TABLE=y
273CONFIG_CPU_FREQ_DEBUG=y 352CONFIG_CPU_FREQ_DEBUG=y
274CONFIG_CPU_FREQ_STAT=y 353# CONFIG_CPU_FREQ_STAT is not set
275# CONFIG_CPU_FREQ_STAT_DETAILS is not set 354# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
276CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y 355# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
277# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set 356CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
357# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
358# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
278CONFIG_CPU_FREQ_GOV_PERFORMANCE=y 359CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
279# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 360# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
280CONFIG_CPU_FREQ_GOV_USERSPACE=y 361CONFIG_CPU_FREQ_GOV_USERSPACE=y
281CONFIG_CPU_FREQ_GOV_ONDEMAND=y 362CONFIG_CPU_FREQ_GOV_ONDEMAND=y
282CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y 363# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
283 364
284# 365#
285# CPUFreq processor drivers 366# CPUFreq processor drivers
@@ -287,8 +368,7 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
287CONFIG_X86_ACPI_CPUFREQ=y 368CONFIG_X86_ACPI_CPUFREQ=y
288# CONFIG_X86_POWERNOW_K6 is not set 369# CONFIG_X86_POWERNOW_K6 is not set
289# CONFIG_X86_POWERNOW_K7 is not set 370# CONFIG_X86_POWERNOW_K7 is not set
290CONFIG_X86_POWERNOW_K8=y 371# CONFIG_X86_POWERNOW_K8 is not set
291CONFIG_X86_POWERNOW_K8_ACPI=y
292# CONFIG_X86_GX_SUSPMOD is not set 372# CONFIG_X86_GX_SUSPMOD is not set
293# CONFIG_X86_SPEEDSTEP_CENTRINO is not set 373# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
294# CONFIG_X86_SPEEDSTEP_ICH is not set 374# CONFIG_X86_SPEEDSTEP_ICH is not set
@@ -302,43 +382,72 @@ CONFIG_X86_POWERNOW_K8_ACPI=y
302# 382#
303# shared options 383# shared options
304# 384#
305CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y 385# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
306# CONFIG_X86_SPEEDSTEP_LIB is not set 386# CONFIG_X86_SPEEDSTEP_LIB is not set
387CONFIG_CPU_IDLE=y
388CONFIG_CPU_IDLE_GOV_LADDER=y
389CONFIG_CPU_IDLE_GOV_MENU=y
307 390
308# 391#
309# Bus options (PCI, PCMCIA, EISA, MCA, ISA) 392# Bus options (PCI etc.)
310# 393#
311CONFIG_PCI=y 394CONFIG_PCI=y
312# CONFIG_PCI_GOBIOS is not set 395# CONFIG_PCI_GOBIOS is not set
313# CONFIG_PCI_GOMMCONFIG is not set 396# CONFIG_PCI_GOMMCONFIG is not set
314# CONFIG_PCI_GODIRECT is not set 397# CONFIG_PCI_GODIRECT is not set
315CONFIG_PCI_GOANY=y 398CONFIG_PCI_GOANY=y
399# CONFIG_PCI_GOOLPC is not set
316CONFIG_PCI_BIOS=y 400CONFIG_PCI_BIOS=y
317CONFIG_PCI_DIRECT=y 401CONFIG_PCI_DIRECT=y
318CONFIG_PCI_MMCONFIG=y 402CONFIG_PCI_MMCONFIG=y
319# CONFIG_PCIEPORTBUS is not set 403CONFIG_PCI_DOMAINS=y
404CONFIG_PCIEPORTBUS=y
405# CONFIG_HOTPLUG_PCI_PCIE is not set
406CONFIG_PCIEAER=y
407# CONFIG_PCIEASPM is not set
320CONFIG_ARCH_SUPPORTS_MSI=y 408CONFIG_ARCH_SUPPORTS_MSI=y
321CONFIG_PCI_MSI=y 409CONFIG_PCI_MSI=y
410# CONFIG_PCI_LEGACY is not set
322# CONFIG_PCI_DEBUG is not set 411# CONFIG_PCI_DEBUG is not set
323# CONFIG_HT_IRQ is not set 412CONFIG_HT_IRQ=y
324CONFIG_ISA_DMA_API=y 413CONFIG_ISA_DMA_API=y
325# CONFIG_ISA is not set 414# CONFIG_ISA is not set
326# CONFIG_MCA is not set 415# CONFIG_MCA is not set
327# CONFIG_SCx200 is not set 416# CONFIG_SCx200 is not set
417# CONFIG_OLPC is not set
328CONFIG_K8_NB=y 418CONFIG_K8_NB=y
329 419CONFIG_PCCARD=y
330# 420# CONFIG_PCMCIA_DEBUG is not set
331# PCCARD (PCMCIA/CardBus) support 421CONFIG_PCMCIA=y
332# 422CONFIG_PCMCIA_LOAD_CIS=y
333# CONFIG_PCCARD is not set 423CONFIG_PCMCIA_IOCTL=y
334# CONFIG_HOTPLUG_PCI is not set 424CONFIG_CARDBUS=y
335 425
336# 426#
337# Executable file formats 427# PC-card bridges
428#
429CONFIG_YENTA=y
430CONFIG_YENTA_O2=y
431CONFIG_YENTA_RICOH=y
432CONFIG_YENTA_TI=y
433CONFIG_YENTA_ENE_TUNE=y
434CONFIG_YENTA_TOSHIBA=y
435# CONFIG_PD6729 is not set
436# CONFIG_I82092 is not set
437CONFIG_PCCARD_NONSTATIC=y
438CONFIG_HOTPLUG_PCI=y
439# CONFIG_HOTPLUG_PCI_FAKE is not set
440# CONFIG_HOTPLUG_PCI_IBM is not set
441# CONFIG_HOTPLUG_PCI_ACPI is not set
442# CONFIG_HOTPLUG_PCI_CPCI is not set
443# CONFIG_HOTPLUG_PCI_SHPC is not set
444
445#
446# Executable file formats / Emulations
338# 447#
339CONFIG_BINFMT_ELF=y 448CONFIG_BINFMT_ELF=y
340# CONFIG_BINFMT_AOUT is not set 449# CONFIG_BINFMT_AOUT is not set
341# CONFIG_BINFMT_MISC is not set 450CONFIG_BINFMT_MISC=y
342 451
343# 452#
344# Networking 453# Networking
@@ -349,59 +458,142 @@ CONFIG_NET=y
349# Networking options 458# Networking options
350# 459#
351CONFIG_PACKET=y 460CONFIG_PACKET=y
352# CONFIG_PACKET_MMAP is not set 461CONFIG_PACKET_MMAP=y
353CONFIG_UNIX=y 462CONFIG_UNIX=y
354CONFIG_XFRM=y 463CONFIG_XFRM=y
355# CONFIG_XFRM_USER is not set 464CONFIG_XFRM_USER=y
356# CONFIG_XFRM_SUB_POLICY is not set 465# CONFIG_XFRM_SUB_POLICY is not set
357# CONFIG_XFRM_MIGRATE is not set 466# CONFIG_XFRM_MIGRATE is not set
467# CONFIG_XFRM_STATISTICS is not set
358# CONFIG_NET_KEY is not set 468# CONFIG_NET_KEY is not set
359CONFIG_INET=y 469CONFIG_INET=y
360CONFIG_IP_MULTICAST=y 470CONFIG_IP_MULTICAST=y
361# CONFIG_IP_ADVANCED_ROUTER is not set 471CONFIG_IP_ADVANCED_ROUTER=y
472CONFIG_ASK_IP_FIB_HASH=y
473# CONFIG_IP_FIB_TRIE is not set
362CONFIG_IP_FIB_HASH=y 474CONFIG_IP_FIB_HASH=y
363CONFIG_IP_PNP=y 475CONFIG_IP_MULTIPLE_TABLES=y
364CONFIG_IP_PNP_DHCP=y 476CONFIG_IP_ROUTE_MULTIPATH=y
365# CONFIG_IP_PNP_BOOTP is not set 477CONFIG_IP_ROUTE_VERBOSE=y
366# CONFIG_IP_PNP_RARP is not set 478# CONFIG_IP_PNP is not set
367# CONFIG_NET_IPIP is not set 479# CONFIG_NET_IPIP is not set
368# CONFIG_NET_IPGRE is not set 480# CONFIG_NET_IPGRE is not set
369# CONFIG_IP_MROUTE is not set 481CONFIG_IP_MROUTE=y
482CONFIG_IP_PIMSM_V1=y
483CONFIG_IP_PIMSM_V2=y
370# CONFIG_ARPD is not set 484# CONFIG_ARPD is not set
371# CONFIG_SYN_COOKIES is not set 485CONFIG_SYN_COOKIES=y
372# CONFIG_INET_AH is not set 486# CONFIG_INET_AH is not set
373# CONFIG_INET_ESP is not set 487# CONFIG_INET_ESP is not set
374# CONFIG_INET_IPCOMP is not set 488# CONFIG_INET_IPCOMP is not set
375# CONFIG_INET_XFRM_TUNNEL is not set 489# CONFIG_INET_XFRM_TUNNEL is not set
376CONFIG_INET_TUNNEL=y 490CONFIG_INET_TUNNEL=y
377CONFIG_INET_XFRM_MODE_TRANSPORT=y 491# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
378CONFIG_INET_XFRM_MODE_TUNNEL=y 492# CONFIG_INET_XFRM_MODE_TUNNEL is not set
379# CONFIG_INET_XFRM_MODE_BEET is not set 493# CONFIG_INET_XFRM_MODE_BEET is not set
380CONFIG_INET_DIAG=y 494CONFIG_INET_LRO=y
381CONFIG_INET_TCP_DIAG=y 495# CONFIG_INET_DIAG is not set
382# CONFIG_TCP_CONG_ADVANCED is not set 496CONFIG_TCP_CONG_ADVANCED=y
497# CONFIG_TCP_CONG_BIC is not set
383CONFIG_TCP_CONG_CUBIC=y 498CONFIG_TCP_CONG_CUBIC=y
499# CONFIG_TCP_CONG_WESTWOOD is not set
500# CONFIG_TCP_CONG_HTCP is not set
501# CONFIG_TCP_CONG_HSTCP is not set
502# CONFIG_TCP_CONG_HYBLA is not set
503# CONFIG_TCP_CONG_VEGAS is not set
504# CONFIG_TCP_CONG_SCALABLE is not set
505# CONFIG_TCP_CONG_LP is not set
506# CONFIG_TCP_CONG_VENO is not set
507# CONFIG_TCP_CONG_YEAH is not set
508# CONFIG_TCP_CONG_ILLINOIS is not set
509# CONFIG_DEFAULT_BIC is not set
510CONFIG_DEFAULT_CUBIC=y
511# CONFIG_DEFAULT_HTCP is not set
512# CONFIG_DEFAULT_VEGAS is not set
513# CONFIG_DEFAULT_WESTWOOD is not set
514# CONFIG_DEFAULT_RENO is not set
384CONFIG_DEFAULT_TCP_CONG="cubic" 515CONFIG_DEFAULT_TCP_CONG="cubic"
385# CONFIG_TCP_MD5SIG is not set 516CONFIG_TCP_MD5SIG=y
517# CONFIG_IP_VS is not set
386CONFIG_IPV6=y 518CONFIG_IPV6=y
387# CONFIG_IPV6_PRIVACY is not set 519# CONFIG_IPV6_PRIVACY is not set
388# CONFIG_IPV6_ROUTER_PREF is not set 520# CONFIG_IPV6_ROUTER_PREF is not set
389# CONFIG_IPV6_OPTIMISTIC_DAD is not set 521# CONFIG_IPV6_OPTIMISTIC_DAD is not set
390# CONFIG_INET6_AH is not set 522CONFIG_INET6_AH=y
391# CONFIG_INET6_ESP is not set 523CONFIG_INET6_ESP=y
392# CONFIG_INET6_IPCOMP is not set 524# CONFIG_INET6_IPCOMP is not set
393# CONFIG_IPV6_MIP6 is not set 525# CONFIG_IPV6_MIP6 is not set
394# CONFIG_INET6_XFRM_TUNNEL is not set 526# CONFIG_INET6_XFRM_TUNNEL is not set
395# CONFIG_INET6_TUNNEL is not set 527# CONFIG_INET6_TUNNEL is not set
396CONFIG_INET6_XFRM_MODE_TRANSPORT=y 528CONFIG_INET6_XFRM_MODE_TRANSPORT=y
397CONFIG_INET6_XFRM_MODE_TUNNEL=y 529CONFIG_INET6_XFRM_MODE_TUNNEL=y
398# CONFIG_INET6_XFRM_MODE_BEET is not set 530CONFIG_INET6_XFRM_MODE_BEET=y
399# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set 531# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
400CONFIG_IPV6_SIT=y 532CONFIG_IPV6_SIT=y
533CONFIG_IPV6_NDISC_NODETYPE=y
401# CONFIG_IPV6_TUNNEL is not set 534# CONFIG_IPV6_TUNNEL is not set
402# CONFIG_IPV6_MULTIPLE_TABLES is not set 535# CONFIG_IPV6_MULTIPLE_TABLES is not set
403# CONFIG_NETWORK_SECMARK is not set 536# CONFIG_IPV6_MROUTE is not set
404# CONFIG_NETFILTER is not set 537CONFIG_NETLABEL=y
538CONFIG_NETWORK_SECMARK=y
539CONFIG_NETFILTER=y
540# CONFIG_NETFILTER_DEBUG is not set
541# CONFIG_NETFILTER_ADVANCED is not set
542
543#
544# Core Netfilter Configuration
545#
546CONFIG_NETFILTER_NETLINK=y
547CONFIG_NETFILTER_NETLINK_LOG=y
548CONFIG_NF_CONNTRACK=y
549CONFIG_NF_CONNTRACK_SECMARK=y
550CONFIG_NF_CONNTRACK_FTP=y
551CONFIG_NF_CONNTRACK_IRC=y
552CONFIG_NF_CONNTRACK_SIP=y
553CONFIG_NF_CT_NETLINK=y
554CONFIG_NETFILTER_XTABLES=y
555CONFIG_NETFILTER_XT_TARGET_MARK=y
556CONFIG_NETFILTER_XT_TARGET_NFLOG=y
557CONFIG_NETFILTER_XT_TARGET_SECMARK=y
558CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
559CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
560CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
561CONFIG_NETFILTER_XT_MATCH_MARK=y
562CONFIG_NETFILTER_XT_MATCH_POLICY=y
563CONFIG_NETFILTER_XT_MATCH_STATE=y
564
565#
566# IP: Netfilter Configuration
567#
568CONFIG_NF_CONNTRACK_IPV4=y
569CONFIG_NF_CONNTRACK_PROC_COMPAT=y
570CONFIG_IP_NF_IPTABLES=y
571CONFIG_IP_NF_FILTER=y
572CONFIG_IP_NF_TARGET_REJECT=y
573CONFIG_IP_NF_TARGET_LOG=y
574CONFIG_IP_NF_TARGET_ULOG=y
575CONFIG_NF_NAT=y
576CONFIG_NF_NAT_NEEDED=y
577CONFIG_IP_NF_TARGET_MASQUERADE=y
578CONFIG_NF_NAT_FTP=y
579CONFIG_NF_NAT_IRC=y
580# CONFIG_NF_NAT_TFTP is not set
581# CONFIG_NF_NAT_AMANDA is not set
582# CONFIG_NF_NAT_PPTP is not set
583# CONFIG_NF_NAT_H323 is not set
584CONFIG_NF_NAT_SIP=y
585CONFIG_IP_NF_MANGLE=y
586
587#
588# IPv6: Netfilter Configuration
589#
590CONFIG_NF_CONNTRACK_IPV6=y
591CONFIG_IP6_NF_IPTABLES=y
592CONFIG_IP6_NF_MATCH_IPV6HEADER=y
593CONFIG_IP6_NF_FILTER=y
594CONFIG_IP6_NF_TARGET_LOG=y
595CONFIG_IP6_NF_TARGET_REJECT=y
596CONFIG_IP6_NF_MANGLE=y
405# CONFIG_IP_DCCP is not set 597# CONFIG_IP_DCCP is not set
406# CONFIG_IP_SCTP is not set 598# CONFIG_IP_SCTP is not set
407# CONFIG_TIPC is not set 599# CONFIG_TIPC is not set
@@ -409,6 +601,7 @@ CONFIG_IPV6_SIT=y
409# CONFIG_BRIDGE is not set 601# CONFIG_BRIDGE is not set
410# CONFIG_VLAN_8021Q is not set 602# CONFIG_VLAN_8021Q is not set
411# CONFIG_DECNET is not set 603# CONFIG_DECNET is not set
604CONFIG_LLC=y
412# CONFIG_LLC2 is not set 605# CONFIG_LLC2 is not set
413# CONFIG_IPX is not set 606# CONFIG_IPX is not set
414# CONFIG_ATALK is not set 607# CONFIG_ATALK is not set
@@ -416,28 +609,99 @@ CONFIG_IPV6_SIT=y
416# CONFIG_LAPB is not set 609# CONFIG_LAPB is not set
417# CONFIG_ECONET is not set 610# CONFIG_ECONET is not set
418# CONFIG_WAN_ROUTER is not set 611# CONFIG_WAN_ROUTER is not set
419 612CONFIG_NET_SCHED=y
420# 613
421# QoS and/or fair queueing 614#
422# 615# Queueing/Scheduling
423# CONFIG_NET_SCHED is not set 616#
617# CONFIG_NET_SCH_CBQ is not set
618# CONFIG_NET_SCH_HTB is not set
619# CONFIG_NET_SCH_HFSC is not set
620# CONFIG_NET_SCH_PRIO is not set
621# CONFIG_NET_SCH_RR is not set
622# CONFIG_NET_SCH_RED is not set
623# CONFIG_NET_SCH_SFQ is not set
624# CONFIG_NET_SCH_TEQL is not set
625# CONFIG_NET_SCH_TBF is not set
626# CONFIG_NET_SCH_GRED is not set
627# CONFIG_NET_SCH_DSMARK is not set
628# CONFIG_NET_SCH_NETEM is not set
629# CONFIG_NET_SCH_INGRESS is not set
630
631#
632# Classification
633#
634CONFIG_NET_CLS=y
635# CONFIG_NET_CLS_BASIC is not set
636# CONFIG_NET_CLS_TCINDEX is not set
637# CONFIG_NET_CLS_ROUTE4 is not set
638# CONFIG_NET_CLS_FW is not set
639# CONFIG_NET_CLS_U32 is not set
640# CONFIG_NET_CLS_RSVP is not set
641# CONFIG_NET_CLS_RSVP6 is not set
642# CONFIG_NET_CLS_FLOW is not set
643CONFIG_NET_EMATCH=y
644CONFIG_NET_EMATCH_STACK=32
645# CONFIG_NET_EMATCH_CMP is not set
646# CONFIG_NET_EMATCH_NBYTE is not set
647# CONFIG_NET_EMATCH_U32 is not set
648# CONFIG_NET_EMATCH_META is not set
649# CONFIG_NET_EMATCH_TEXT is not set
650CONFIG_NET_CLS_ACT=y
651# CONFIG_NET_ACT_POLICE is not set
652# CONFIG_NET_ACT_GACT is not set
653# CONFIG_NET_ACT_MIRRED is not set
654# CONFIG_NET_ACT_IPT is not set
655# CONFIG_NET_ACT_NAT is not set
656# CONFIG_NET_ACT_PEDIT is not set
657# CONFIG_NET_ACT_SIMP is not set
658CONFIG_NET_SCH_FIFO=y
424 659
425# 660#
426# Network testing 661# Network testing
427# 662#
428# CONFIG_NET_PKTGEN is not set 663# CONFIG_NET_PKTGEN is not set
429# CONFIG_NET_TCPPROBE is not set 664# CONFIG_NET_TCPPROBE is not set
430# CONFIG_HAMRADIO is not set 665CONFIG_HAMRADIO=y
666
667#
668# Packet Radio protocols
669#
670# CONFIG_AX25 is not set
671# CONFIG_CAN is not set
431# CONFIG_IRDA is not set 672# CONFIG_IRDA is not set
432# CONFIG_BT is not set 673# CONFIG_BT is not set
433# CONFIG_AF_RXRPC is not set 674# CONFIG_AF_RXRPC is not set
675CONFIG_FIB_RULES=y
434 676
435# 677#
436# Wireless 678# Wireless
437# 679#
438# CONFIG_CFG80211 is not set 680CONFIG_CFG80211=y
439# CONFIG_WIRELESS_EXT is not set 681CONFIG_NL80211=y
440# CONFIG_MAC80211 is not set 682CONFIG_WIRELESS_EXT=y
683CONFIG_MAC80211=y
684
685#
686# Rate control algorithm selection
687#
688CONFIG_MAC80211_RC_DEFAULT_PID=y
689# CONFIG_MAC80211_RC_DEFAULT_NONE is not set
690
691#
692# Selecting 'y' for an algorithm will
693#
694
695#
696# build the algorithm into mac80211.
697#
698CONFIG_MAC80211_RC_DEFAULT="pid"
699CONFIG_MAC80211_RC_PID=y
700# CONFIG_MAC80211_MESH is not set
701CONFIG_MAC80211_LEDS=y
702# CONFIG_MAC80211_DEBUGFS is not set
703# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set
704# CONFIG_MAC80211_DEBUG is not set
441# CONFIG_IEEE80211 is not set 705# CONFIG_IEEE80211 is not set
442# CONFIG_RFKILL is not set 706# CONFIG_RFKILL is not set
443# CONFIG_NET_9P is not set 707# CONFIG_NET_9P is not set
@@ -449,13 +713,15 @@ CONFIG_IPV6_SIT=y
449# 713#
450# Generic Driver Options 714# Generic Driver Options
451# 715#
716CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
452CONFIG_STANDALONE=y 717CONFIG_STANDALONE=y
453CONFIG_PREVENT_FIRMWARE_BUILD=y 718CONFIG_PREVENT_FIRMWARE_BUILD=y
454CONFIG_FW_LOADER=y 719CONFIG_FW_LOADER=y
455# CONFIG_DEBUG_DRIVER is not set 720# CONFIG_DEBUG_DRIVER is not set
456# CONFIG_DEBUG_DEVRES is not set 721CONFIG_DEBUG_DEVRES=y
457# CONFIG_SYS_HYPERVISOR is not set 722# CONFIG_SYS_HYPERVISOR is not set
458# CONFIG_CONNECTOR is not set 723CONFIG_CONNECTOR=y
724CONFIG_PROC_EVENTS=y
459# CONFIG_MTD is not set 725# CONFIG_MTD is not set
460# CONFIG_PARPORT is not set 726# CONFIG_PARPORT is not set
461CONFIG_PNP=y 727CONFIG_PNP=y
@@ -466,7 +732,7 @@ CONFIG_PNP=y
466# 732#
467CONFIG_PNPACPI=y 733CONFIG_PNPACPI=y
468CONFIG_BLK_DEV=y 734CONFIG_BLK_DEV=y
469CONFIG_BLK_DEV_FD=y 735# CONFIG_BLK_DEV_FD is not set
470# CONFIG_BLK_CPQ_DA is not set 736# CONFIG_BLK_CPQ_DA is not set
471# CONFIG_BLK_CPQ_CISS_DA is not set 737# CONFIG_BLK_CPQ_CISS_DA is not set
472# CONFIG_BLK_DEV_DAC960 is not set 738# CONFIG_BLK_DEV_DAC960 is not set
@@ -479,8 +745,8 @@ CONFIG_BLK_DEV_LOOP=y
479# CONFIG_BLK_DEV_UB is not set 745# CONFIG_BLK_DEV_UB is not set
480CONFIG_BLK_DEV_RAM=y 746CONFIG_BLK_DEV_RAM=y
481CONFIG_BLK_DEV_RAM_COUNT=16 747CONFIG_BLK_DEV_RAM_COUNT=16
482CONFIG_BLK_DEV_RAM_SIZE=4096 748CONFIG_BLK_DEV_RAM_SIZE=16384
483CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 749# CONFIG_BLK_DEV_XIP is not set
484# CONFIG_CDROM_PKTCDVD is not set 750# CONFIG_CDROM_PKTCDVD is not set
485# CONFIG_ATA_OVER_ETH is not set 751# CONFIG_ATA_OVER_ETH is not set
486CONFIG_MISC_DEVICES=y 752CONFIG_MISC_DEVICES=y
@@ -489,73 +755,17 @@ CONFIG_MISC_DEVICES=y
489# CONFIG_EEPROM_93CX6 is not set 755# CONFIG_EEPROM_93CX6 is not set
490# CONFIG_SGI_IOC4 is not set 756# CONFIG_SGI_IOC4 is not set
491# CONFIG_TIFM_CORE is not set 757# CONFIG_TIFM_CORE is not set
758# CONFIG_ACER_WMI is not set
759# CONFIG_ASUS_LAPTOP is not set
760# CONFIG_FUJITSU_LAPTOP is not set
761# CONFIG_TC1100_WMI is not set
762# CONFIG_MSI_LAPTOP is not set
492# CONFIG_SONY_LAPTOP is not set 763# CONFIG_SONY_LAPTOP is not set
493# CONFIG_THINKPAD_ACPI is not set 764# CONFIG_THINKPAD_ACPI is not set
494CONFIG_IDE=y 765# CONFIG_INTEL_MENLOW is not set
495CONFIG_BLK_DEV_IDE=y 766# CONFIG_ENCLOSURE_SERVICES is not set
496 767CONFIG_HAVE_IDE=y
497# 768# CONFIG_IDE is not set
498# Please see Documentation/ide.txt for help/info on IDE drives
499#
500# CONFIG_BLK_DEV_IDE_SATA is not set
501# CONFIG_BLK_DEV_HD_IDE is not set
502CONFIG_BLK_DEV_IDEDISK=y
503CONFIG_IDEDISK_MULTI_MODE=y
504CONFIG_BLK_DEV_IDECD=y
505# CONFIG_BLK_DEV_IDETAPE is not set
506# CONFIG_BLK_DEV_IDEFLOPPY is not set
507# CONFIG_BLK_DEV_IDESCSI is not set
508CONFIG_BLK_DEV_IDEACPI=y
509# CONFIG_IDE_TASK_IOCTL is not set
510CONFIG_IDE_PROC_FS=y
511
512#
513# IDE chipset support/bugfixes
514#
515CONFIG_IDE_GENERIC=y
516# CONFIG_BLK_DEV_CMD640 is not set
517# CONFIG_BLK_DEV_IDEPNP is not set
518CONFIG_BLK_DEV_IDEPCI=y
519# CONFIG_IDEPCI_SHARE_IRQ is not set
520CONFIG_IDEPCI_PCIBUS_ORDER=y
521# CONFIG_BLK_DEV_OFFBOARD is not set
522# CONFIG_BLK_DEV_GENERIC is not set
523# CONFIG_BLK_DEV_OPTI621 is not set
524# CONFIG_BLK_DEV_RZ1000 is not set
525CONFIG_BLK_DEV_IDEDMA_PCI=y
526# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
527# CONFIG_IDEDMA_ONLYDISK is not set
528# CONFIG_BLK_DEV_AEC62XX is not set
529# CONFIG_BLK_DEV_ALI15X3 is not set
530CONFIG_BLK_DEV_AMD74XX=y
531# CONFIG_BLK_DEV_ATIIXP is not set
532# CONFIG_BLK_DEV_CMD64X is not set
533# CONFIG_BLK_DEV_TRIFLEX is not set
534# CONFIG_BLK_DEV_CY82C693 is not set
535# CONFIG_BLK_DEV_CS5520 is not set
536# CONFIG_BLK_DEV_CS5530 is not set
537# CONFIG_BLK_DEV_CS5535 is not set
538# CONFIG_BLK_DEV_HPT34X is not set
539# CONFIG_BLK_DEV_HPT366 is not set
540# CONFIG_BLK_DEV_JMICRON is not set
541# CONFIG_BLK_DEV_SC1200 is not set
542CONFIG_BLK_DEV_PIIX=y
543# CONFIG_BLK_DEV_IT8213 is not set
544# CONFIG_BLK_DEV_IT821X is not set
545# CONFIG_BLK_DEV_NS87415 is not set
546# CONFIG_BLK_DEV_PDC202XX_OLD is not set
547# CONFIG_BLK_DEV_PDC202XX_NEW is not set
548# CONFIG_BLK_DEV_SVWKS is not set
549# CONFIG_BLK_DEV_SIIMAGE is not set
550# CONFIG_BLK_DEV_SIS5513 is not set
551# CONFIG_BLK_DEV_SLC90E66 is not set
552# CONFIG_BLK_DEV_TRM290 is not set
553# CONFIG_BLK_DEV_VIA82CXXX is not set
554# CONFIG_BLK_DEV_TC86C001 is not set
555# CONFIG_IDE_ARM is not set
556CONFIG_BLK_DEV_IDEDMA=y
557# CONFIG_IDEDMA_IVB is not set
558# CONFIG_BLK_DEV_HD is not set
559 769
560# 770#
561# SCSI device support 771# SCSI device support
@@ -564,8 +774,8 @@ CONFIG_BLK_DEV_IDEDMA=y
564CONFIG_SCSI=y 774CONFIG_SCSI=y
565CONFIG_SCSI_DMA=y 775CONFIG_SCSI_DMA=y
566# CONFIG_SCSI_TGT is not set 776# CONFIG_SCSI_TGT is not set
567CONFIG_SCSI_NETLINK=y 777# CONFIG_SCSI_NETLINK is not set
568# CONFIG_SCSI_PROC_FS is not set 778CONFIG_SCSI_PROC_FS=y
569 779
570# 780#
571# SCSI support type (disk, tape, CD-ROM) 781# SCSI support type (disk, tape, CD-ROM)
@@ -574,7 +784,7 @@ CONFIG_BLK_DEV_SD=y
574# CONFIG_CHR_DEV_ST is not set 784# CONFIG_CHR_DEV_ST is not set
575# CONFIG_CHR_DEV_OSST is not set 785# CONFIG_CHR_DEV_OSST is not set
576CONFIG_BLK_DEV_SR=y 786CONFIG_BLK_DEV_SR=y
577# CONFIG_BLK_DEV_SR_VENDOR is not set 787CONFIG_BLK_DEV_SR_VENDOR=y
578CONFIG_CHR_DEV_SG=y 788CONFIG_CHR_DEV_SG=y
579# CONFIG_CHR_DEV_SCH is not set 789# CONFIG_CHR_DEV_SCH is not set
580 790
@@ -582,7 +792,7 @@ CONFIG_CHR_DEV_SG=y
582# Some SCSI devices (e.g. CD jukebox) support multiple LUNs 792# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
583# 793#
584# CONFIG_SCSI_MULTI_LUN is not set 794# CONFIG_SCSI_MULTI_LUN is not set
585# CONFIG_SCSI_CONSTANTS is not set 795CONFIG_SCSI_CONSTANTS=y
586# CONFIG_SCSI_LOGGING is not set 796# CONFIG_SCSI_LOGGING is not set
587# CONFIG_SCSI_SCAN_ASYNC is not set 797# CONFIG_SCSI_SCAN_ASYNC is not set
588CONFIG_SCSI_WAIT_SCAN=m 798CONFIG_SCSI_WAIT_SCAN=m
@@ -591,81 +801,37 @@ CONFIG_SCSI_WAIT_SCAN=m
591# SCSI Transports 801# SCSI Transports
592# 802#
593CONFIG_SCSI_SPI_ATTRS=y 803CONFIG_SCSI_SPI_ATTRS=y
594CONFIG_SCSI_FC_ATTRS=y 804# CONFIG_SCSI_FC_ATTRS is not set
595# CONFIG_SCSI_ISCSI_ATTRS is not set 805# CONFIG_SCSI_ISCSI_ATTRS is not set
596# CONFIG_SCSI_SAS_ATTRS is not set 806# CONFIG_SCSI_SAS_ATTRS is not set
597# CONFIG_SCSI_SAS_LIBSAS is not set 807# CONFIG_SCSI_SAS_LIBSAS is not set
598 808# CONFIG_SCSI_SRP_ATTRS is not set
599# 809# CONFIG_SCSI_LOWLEVEL is not set
600# SCSI low-level drivers 810# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
601#
602# CONFIG_ISCSI_TCP is not set
603CONFIG_BLK_DEV_3W_XXXX_RAID=y
604# CONFIG_SCSI_3W_9XXX is not set
605# CONFIG_SCSI_ACARD is not set
606# CONFIG_SCSI_AACRAID is not set
607CONFIG_SCSI_AIC7XXX=y
608CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
609CONFIG_AIC7XXX_RESET_DELAY_MS=5000
610CONFIG_AIC7XXX_DEBUG_ENABLE=y
611CONFIG_AIC7XXX_DEBUG_MASK=0
612CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
613# CONFIG_SCSI_AIC7XXX_OLD is not set
614CONFIG_SCSI_AIC79XX=y
615CONFIG_AIC79XX_CMDS_PER_DEVICE=32
616CONFIG_AIC79XX_RESET_DELAY_MS=4000
617# CONFIG_AIC79XX_DEBUG_ENABLE is not set
618CONFIG_AIC79XX_DEBUG_MASK=0
619# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
620# CONFIG_SCSI_AIC94XX is not set
621# CONFIG_SCSI_DPT_I2O is not set
622# CONFIG_SCSI_ADVANSYS is not set
623# CONFIG_SCSI_ARCMSR is not set
624# CONFIG_MEGARAID_NEWGEN is not set
625# CONFIG_MEGARAID_LEGACY is not set
626# CONFIG_MEGARAID_SAS is not set
627# CONFIG_SCSI_HPTIOP is not set
628# CONFIG_SCSI_BUSLOGIC is not set
629# CONFIG_SCSI_DMX3191D is not set
630# CONFIG_SCSI_EATA is not set
631# CONFIG_SCSI_FUTURE_DOMAIN is not set
632# CONFIG_SCSI_GDTH is not set
633# CONFIG_SCSI_IPS is not set
634# CONFIG_SCSI_INITIO is not set
635# CONFIG_SCSI_INIA100 is not set
636# CONFIG_SCSI_STEX is not set
637# CONFIG_SCSI_SYM53C8XX_2 is not set
638# CONFIG_SCSI_IPR is not set
639# CONFIG_SCSI_QLOGIC_1280 is not set
640# CONFIG_SCSI_QLA_FC is not set
641# CONFIG_SCSI_QLA_ISCSI is not set
642# CONFIG_SCSI_LPFC is not set
643# CONFIG_SCSI_DC395x is not set
644# CONFIG_SCSI_DC390T is not set
645# CONFIG_SCSI_NSP32 is not set
646# CONFIG_SCSI_DEBUG is not set
647# CONFIG_SCSI_SRP is not set
648CONFIG_ATA=y 811CONFIG_ATA=y
649# CONFIG_ATA_NONSTANDARD is not set 812# CONFIG_ATA_NONSTANDARD is not set
650CONFIG_ATA_ACPI=y 813CONFIG_ATA_ACPI=y
814CONFIG_SATA_PMP=y
651CONFIG_SATA_AHCI=y 815CONFIG_SATA_AHCI=y
652CONFIG_SATA_SVW=y 816# CONFIG_SATA_SIL24 is not set
817CONFIG_ATA_SFF=y
818# CONFIG_SATA_SVW is not set
653CONFIG_ATA_PIIX=y 819CONFIG_ATA_PIIX=y
654# CONFIG_SATA_MV is not set 820# CONFIG_SATA_MV is not set
655CONFIG_SATA_NV=y 821# CONFIG_SATA_NV is not set
656# CONFIG_PDC_ADMA is not set 822# CONFIG_PDC_ADMA is not set
657# CONFIG_SATA_QSTOR is not set 823# CONFIG_SATA_QSTOR is not set
658# CONFIG_SATA_PROMISE is not set 824# CONFIG_SATA_PROMISE is not set
659# CONFIG_SATA_SX4 is not set 825# CONFIG_SATA_SX4 is not set
660CONFIG_SATA_SIL=y 826# CONFIG_SATA_SIL is not set
661# CONFIG_SATA_SIL24 is not set
662# CONFIG_SATA_SIS is not set 827# CONFIG_SATA_SIS is not set
663# CONFIG_SATA_ULI is not set 828# CONFIG_SATA_ULI is not set
664CONFIG_SATA_VIA=y 829# CONFIG_SATA_VIA is not set
665# CONFIG_SATA_VITESSE is not set 830# CONFIG_SATA_VITESSE is not set
666# CONFIG_SATA_INIC162X is not set 831# CONFIG_SATA_INIC162X is not set
832# CONFIG_PATA_ACPI is not set
667# CONFIG_PATA_ALI is not set 833# CONFIG_PATA_ALI is not set
668# CONFIG_PATA_AMD is not set 834CONFIG_PATA_AMD=y
669# CONFIG_PATA_ARTOP is not set 835# CONFIG_PATA_ARTOP is not set
670# CONFIG_PATA_ATIIXP is not set 836# CONFIG_PATA_ATIIXP is not set
671# CONFIG_PATA_CMD640_PCI is not set 837# CONFIG_PATA_CMD640_PCI is not set
@@ -673,6 +839,7 @@ CONFIG_SATA_VIA=y
673# CONFIG_PATA_CS5520 is not set 839# CONFIG_PATA_CS5520 is not set
674# CONFIG_PATA_CS5530 is not set 840# CONFIG_PATA_CS5530 is not set
675# CONFIG_PATA_CS5535 is not set 841# CONFIG_PATA_CS5535 is not set
842# CONFIG_PATA_CS5536 is not set
676# CONFIG_PATA_CYPRESS is not set 843# CONFIG_PATA_CYPRESS is not set
677# CONFIG_PATA_EFAR is not set 844# CONFIG_PATA_EFAR is not set
678# CONFIG_ATA_GENERIC is not set 845# CONFIG_ATA_GENERIC is not set
@@ -686,11 +853,14 @@ CONFIG_SATA_VIA=y
686# CONFIG_PATA_TRIFLEX is not set 853# CONFIG_PATA_TRIFLEX is not set
687# CONFIG_PATA_MARVELL is not set 854# CONFIG_PATA_MARVELL is not set
688# CONFIG_PATA_MPIIX is not set 855# CONFIG_PATA_MPIIX is not set
689# CONFIG_PATA_OLDPIIX is not set 856CONFIG_PATA_OLDPIIX=y
690# CONFIG_PATA_NETCELL is not set 857# CONFIG_PATA_NETCELL is not set
858# CONFIG_PATA_NINJA32 is not set
691# CONFIG_PATA_NS87410 is not set 859# CONFIG_PATA_NS87410 is not set
860# CONFIG_PATA_NS87415 is not set
692# CONFIG_PATA_OPTI is not set 861# CONFIG_PATA_OPTI is not set
693# CONFIG_PATA_OPTIDMA is not set 862# CONFIG_PATA_OPTIDMA is not set
863# CONFIG_PATA_PCMCIA is not set
694# CONFIG_PATA_PDC_OLD is not set 864# CONFIG_PATA_PDC_OLD is not set
695# CONFIG_PATA_RADISYS is not set 865# CONFIG_PATA_RADISYS is not set
696# CONFIG_PATA_RZ1000 is not set 866# CONFIG_PATA_RZ1000 is not set
@@ -702,65 +872,42 @@ CONFIG_SATA_VIA=y
702# CONFIG_PATA_VIA is not set 872# CONFIG_PATA_VIA is not set
703# CONFIG_PATA_WINBOND is not set 873# CONFIG_PATA_WINBOND is not set
704CONFIG_MD=y 874CONFIG_MD=y
705# CONFIG_BLK_DEV_MD is not set 875CONFIG_BLK_DEV_MD=y
876# CONFIG_MD_LINEAR is not set
877# CONFIG_MD_RAID0 is not set
878# CONFIG_MD_RAID1 is not set
879# CONFIG_MD_RAID10 is not set
880# CONFIG_MD_RAID456 is not set
881# CONFIG_MD_MULTIPATH is not set
882# CONFIG_MD_FAULTY is not set
706CONFIG_BLK_DEV_DM=y 883CONFIG_BLK_DEV_DM=y
707# CONFIG_DM_DEBUG is not set 884# CONFIG_DM_DEBUG is not set
708# CONFIG_DM_CRYPT is not set 885# CONFIG_DM_CRYPT is not set
709# CONFIG_DM_SNAPSHOT is not set 886# CONFIG_DM_SNAPSHOT is not set
710# CONFIG_DM_MIRROR is not set 887CONFIG_DM_MIRROR=y
711# CONFIG_DM_ZERO is not set 888CONFIG_DM_ZERO=y
712# CONFIG_DM_MULTIPATH is not set 889# CONFIG_DM_MULTIPATH is not set
713# CONFIG_DM_DELAY is not set 890# CONFIG_DM_DELAY is not set
714 891# CONFIG_DM_UEVENT is not set
715# 892# CONFIG_FUSION is not set
716# Fusion MPT device support
717#
718CONFIG_FUSION=y
719CONFIG_FUSION_SPI=y
720# CONFIG_FUSION_FC is not set
721# CONFIG_FUSION_SAS is not set
722CONFIG_FUSION_MAX_SGE=128
723# CONFIG_FUSION_CTL is not set
724 893
725# 894#
726# IEEE 1394 (FireWire) support 895# IEEE 1394 (FireWire) support
727# 896#
728# CONFIG_FIREWIRE is not set 897# CONFIG_FIREWIRE is not set
729CONFIG_IEEE1394=y 898# CONFIG_IEEE1394 is not set
730
731#
732# Subsystem Options
733#
734# CONFIG_IEEE1394_VERBOSEDEBUG is not set
735
736#
737# Controllers
738#
739
740#
741# Texas Instruments PCILynx requires I2C
742#
743CONFIG_IEEE1394_OHCI1394=y
744
745#
746# Protocols
747#
748# CONFIG_IEEE1394_VIDEO1394 is not set
749# CONFIG_IEEE1394_SBP2 is not set
750# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
751# CONFIG_IEEE1394_ETH1394 is not set
752# CONFIG_IEEE1394_DV1394 is not set
753CONFIG_IEEE1394_RAWIO=y
754# CONFIG_I2O is not set 899# CONFIG_I2O is not set
755CONFIG_MACINTOSH_DRIVERS=y 900CONFIG_MACINTOSH_DRIVERS=y
756# CONFIG_MAC_EMUMOUSEBTN is not set 901CONFIG_MAC_EMUMOUSEBTN=y
757CONFIG_NETDEVICES=y 902CONFIG_NETDEVICES=y
758CONFIG_NETDEVICES_MULTIQUEUE=y 903# CONFIG_NETDEVICES_MULTIQUEUE is not set
904# CONFIG_IFB is not set
759# CONFIG_DUMMY is not set 905# CONFIG_DUMMY is not set
760# CONFIG_BONDING is not set 906# CONFIG_BONDING is not set
761# CONFIG_MACVLAN is not set 907# CONFIG_MACVLAN is not set
762# CONFIG_EQUALIZER is not set 908# CONFIG_EQUALIZER is not set
763# CONFIG_TUN is not set 909# CONFIG_TUN is not set
910# CONFIG_VETH is not set
764# CONFIG_NET_SB1000 is not set 911# CONFIG_NET_SB1000 is not set
765# CONFIG_ARCNET is not set 912# CONFIG_ARCNET is not set
766# CONFIG_PHYLIB is not set 913# CONFIG_PHYLIB is not set
@@ -770,38 +917,40 @@ CONFIG_MII=y
770# CONFIG_SUNGEM is not set 917# CONFIG_SUNGEM is not set
771# CONFIG_CASSINI is not set 918# CONFIG_CASSINI is not set
772CONFIG_NET_VENDOR_3COM=y 919CONFIG_NET_VENDOR_3COM=y
773CONFIG_VORTEX=y 920# CONFIG_VORTEX is not set
774# CONFIG_TYPHOON is not set 921# CONFIG_TYPHOON is not set
775CONFIG_NET_TULIP=y 922CONFIG_NET_TULIP=y
776# CONFIG_DE2104X is not set 923# CONFIG_DE2104X is not set
777CONFIG_TULIP=y 924# CONFIG_TULIP is not set
778# CONFIG_TULIP_MWI is not set
779# CONFIG_TULIP_MMIO is not set
780# CONFIG_TULIP_NAPI is not set
781# CONFIG_DE4X5 is not set 925# CONFIG_DE4X5 is not set
782# CONFIG_WINBOND_840 is not set 926# CONFIG_WINBOND_840 is not set
783# CONFIG_DM9102 is not set 927# CONFIG_DM9102 is not set
784# CONFIG_ULI526X is not set 928# CONFIG_ULI526X is not set
929# CONFIG_PCMCIA_XIRCOM is not set
785# CONFIG_HP100 is not set 930# CONFIG_HP100 is not set
931# CONFIG_IBM_NEW_EMAC_ZMII is not set
932# CONFIG_IBM_NEW_EMAC_RGMII is not set
933# CONFIG_IBM_NEW_EMAC_TAH is not set
934# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
786CONFIG_NET_PCI=y 935CONFIG_NET_PCI=y
787# CONFIG_PCNET32 is not set 936# CONFIG_PCNET32 is not set
788# CONFIG_AMD8111_ETH is not set 937# CONFIG_AMD8111_ETH is not set
789# CONFIG_ADAPTEC_STARFIRE is not set 938# CONFIG_ADAPTEC_STARFIRE is not set
790CONFIG_B44=y 939# CONFIG_B44 is not set
791CONFIG_FORCEDETH=y 940CONFIG_FORCEDETH=y
792# CONFIG_FORCEDETH_NAPI is not set 941# CONFIG_FORCEDETH_NAPI is not set
793# CONFIG_DGRS is not set
794# CONFIG_EEPRO100 is not set 942# CONFIG_EEPRO100 is not set
795CONFIG_E100=y 943CONFIG_E100=y
796# CONFIG_FEALNX is not set 944# CONFIG_FEALNX is not set
797# CONFIG_NATSEMI is not set 945# CONFIG_NATSEMI is not set
798# CONFIG_NE2K_PCI is not set 946# CONFIG_NE2K_PCI is not set
799CONFIG_8139CP=y 947# CONFIG_8139CP is not set
800CONFIG_8139TOO=y 948CONFIG_8139TOO=y
801# CONFIG_8139TOO_PIO is not set 949CONFIG_8139TOO_PIO=y
802# CONFIG_8139TOO_TUNE_TWISTER is not set 950# CONFIG_8139TOO_TUNE_TWISTER is not set
803# CONFIG_8139TOO_8129 is not set 951# CONFIG_8139TOO_8129 is not set
804# CONFIG_8139_OLD_RX_RESET is not set 952# CONFIG_8139_OLD_RX_RESET is not set
953# CONFIG_R6040 is not set
805# CONFIG_SIS900 is not set 954# CONFIG_SIS900 is not set
806# CONFIG_EPIC100 is not set 955# CONFIG_EPIC100 is not set
807# CONFIG_SUNDANCE is not set 956# CONFIG_SUNDANCE is not set
@@ -814,34 +963,75 @@ CONFIG_NETDEV_1000=y
814CONFIG_E1000=y 963CONFIG_E1000=y
815# CONFIG_E1000_NAPI is not set 964# CONFIG_E1000_NAPI is not set
816# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set 965# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
966# CONFIG_E1000E is not set
967# CONFIG_E1000E_ENABLED is not set
968# CONFIG_IP1000 is not set
969# CONFIG_IGB is not set
817# CONFIG_NS83820 is not set 970# CONFIG_NS83820 is not set
818# CONFIG_HAMACHI is not set 971# CONFIG_HAMACHI is not set
819# CONFIG_YELLOWFIN is not set 972# CONFIG_YELLOWFIN is not set
820CONFIG_R8169=y 973# CONFIG_R8169 is not set
821# CONFIG_R8169_NAPI is not set
822# CONFIG_SIS190 is not set 974# CONFIG_SIS190 is not set
823# CONFIG_SKGE is not set 975# CONFIG_SKGE is not set
824CONFIG_SKY2=y 976CONFIG_SKY2=y
977# CONFIG_SKY2_DEBUG is not set
825# CONFIG_VIA_VELOCITY is not set 978# CONFIG_VIA_VELOCITY is not set
826CONFIG_TIGON3=y 979CONFIG_TIGON3=y
827CONFIG_BNX2=y 980# CONFIG_BNX2 is not set
828# CONFIG_QLA3XXX is not set 981# CONFIG_QLA3XXX is not set
829# CONFIG_ATL1 is not set 982# CONFIG_ATL1 is not set
830CONFIG_NETDEV_10000=y 983CONFIG_NETDEV_10000=y
831# CONFIG_CHELSIO_T1 is not set 984# CONFIG_CHELSIO_T1 is not set
832# CONFIG_CHELSIO_T3 is not set 985# CONFIG_CHELSIO_T3 is not set
986# CONFIG_IXGBE is not set
833# CONFIG_IXGB is not set 987# CONFIG_IXGB is not set
834# CONFIG_S2IO is not set 988# CONFIG_S2IO is not set
835# CONFIG_MYRI10GE is not set 989# CONFIG_MYRI10GE is not set
836# CONFIG_NETXEN_NIC is not set 990# CONFIG_NETXEN_NIC is not set
991# CONFIG_NIU is not set
837# CONFIG_MLX4_CORE is not set 992# CONFIG_MLX4_CORE is not set
838# CONFIG_TR is not set 993# CONFIG_TEHUTI is not set
994# CONFIG_BNX2X is not set
995# CONFIG_SFC is not set
996CONFIG_TR=y
997# CONFIG_IBMOL is not set
998# CONFIG_IBMLS is not set
999# CONFIG_3C359 is not set
1000# CONFIG_TMS380TR is not set
839 1001
840# 1002#
841# Wireless LAN 1003# Wireless LAN
842# 1004#
843# CONFIG_WLAN_PRE80211 is not set 1005# CONFIG_WLAN_PRE80211 is not set
844# CONFIG_WLAN_80211 is not set 1006CONFIG_WLAN_80211=y
1007# CONFIG_PCMCIA_RAYCS is not set
1008# CONFIG_IPW2100 is not set
1009# CONFIG_IPW2200 is not set
1010# CONFIG_LIBERTAS is not set
1011# CONFIG_AIRO is not set
1012# CONFIG_HERMES is not set
1013# CONFIG_ATMEL is not set
1014# CONFIG_AIRO_CS is not set
1015# CONFIG_PCMCIA_WL3501 is not set
1016# CONFIG_PRISM54 is not set
1017# CONFIG_USB_ZD1201 is not set
1018# CONFIG_USB_NET_RNDIS_WLAN is not set
1019# CONFIG_RTL8180 is not set
1020# CONFIG_RTL8187 is not set
1021# CONFIG_ADM8211 is not set
1022# CONFIG_P54_COMMON is not set
1023CONFIG_ATH5K=y
1024# CONFIG_ATH5K_DEBUG is not set
1025# CONFIG_IWLWIFI is not set
1026# CONFIG_IWLCORE is not set
1027# CONFIG_IWLWIFI_LEDS is not set
1028# CONFIG_IWL4965 is not set
1029# CONFIG_IWL3945 is not set
1030# CONFIG_HOSTAP is not set
1031# CONFIG_B43 is not set
1032# CONFIG_B43LEGACY is not set
1033# CONFIG_ZD1211RW is not set
1034# CONFIG_RT2X00 is not set
845 1035
846# 1036#
847# USB Network Adapters 1037# USB Network Adapters
@@ -850,16 +1040,27 @@ CONFIG_NETDEV_10000=y
850# CONFIG_USB_KAWETH is not set 1040# CONFIG_USB_KAWETH is not set
851# CONFIG_USB_PEGASUS is not set 1041# CONFIG_USB_PEGASUS is not set
852# CONFIG_USB_RTL8150 is not set 1042# CONFIG_USB_RTL8150 is not set
853# CONFIG_USB_USBNET_MII is not set
854# CONFIG_USB_USBNET is not set 1043# CONFIG_USB_USBNET is not set
1044CONFIG_NET_PCMCIA=y
1045# CONFIG_PCMCIA_3C589 is not set
1046# CONFIG_PCMCIA_3C574 is not set
1047# CONFIG_PCMCIA_FMVJ18X is not set
1048# CONFIG_PCMCIA_PCNET is not set
1049# CONFIG_PCMCIA_NMCLAN is not set
1050# CONFIG_PCMCIA_SMC91C92 is not set
1051# CONFIG_PCMCIA_XIRC2PS is not set
1052# CONFIG_PCMCIA_AXNET is not set
1053# CONFIG_PCMCIA_IBMTR is not set
855# CONFIG_WAN is not set 1054# CONFIG_WAN is not set
856# CONFIG_FDDI is not set 1055CONFIG_FDDI=y
1056# CONFIG_DEFXX is not set
1057# CONFIG_SKFP is not set
857# CONFIG_HIPPI is not set 1058# CONFIG_HIPPI is not set
858# CONFIG_PPP is not set 1059# CONFIG_PPP is not set
859# CONFIG_SLIP is not set 1060# CONFIG_SLIP is not set
860# CONFIG_NET_FC is not set 1061# CONFIG_NET_FC is not set
861# CONFIG_SHAPER is not set
862CONFIG_NETCONSOLE=y 1062CONFIG_NETCONSOLE=y
1063# CONFIG_NETCONSOLE_DYNAMIC is not set
863CONFIG_NETPOLL=y 1064CONFIG_NETPOLL=y
864# CONFIG_NETPOLL_TRAP is not set 1065# CONFIG_NETPOLL_TRAP is not set
865CONFIG_NET_POLL_CONTROLLER=y 1066CONFIG_NET_POLL_CONTROLLER=y
@@ -870,18 +1071,17 @@ CONFIG_NET_POLL_CONTROLLER=y
870# Input device support 1071# Input device support
871# 1072#
872CONFIG_INPUT=y 1073CONFIG_INPUT=y
873# CONFIG_INPUT_FF_MEMLESS is not set 1074CONFIG_INPUT_FF_MEMLESS=y
874# CONFIG_INPUT_POLLDEV is not set 1075CONFIG_INPUT_POLLDEV=y
875 1076
876# 1077#
877# Userland interfaces 1078# Userland interfaces
878# 1079#
879CONFIG_INPUT_MOUSEDEV=y 1080CONFIG_INPUT_MOUSEDEV=y
880CONFIG_INPUT_MOUSEDEV_PSAUX=y 1081# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
881CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 1082CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
882CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 1083CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
883# CONFIG_INPUT_JOYDEV is not set 1084# CONFIG_INPUT_JOYDEV is not set
884# CONFIG_INPUT_TSDEV is not set
885CONFIG_INPUT_EVDEV=y 1085CONFIG_INPUT_EVDEV=y
886# CONFIG_INPUT_EVBUG is not set 1086# CONFIG_INPUT_EVBUG is not set
887 1087
@@ -906,17 +1106,63 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y
906# CONFIG_MOUSE_SERIAL is not set 1106# CONFIG_MOUSE_SERIAL is not set
907# CONFIG_MOUSE_APPLETOUCH is not set 1107# CONFIG_MOUSE_APPLETOUCH is not set
908# CONFIG_MOUSE_VSXXXAA is not set 1108# CONFIG_MOUSE_VSXXXAA is not set
909# CONFIG_INPUT_JOYSTICK is not set 1109CONFIG_INPUT_JOYSTICK=y
910# CONFIG_INPUT_TABLET is not set 1110# CONFIG_JOYSTICK_ANALOG is not set
911# CONFIG_INPUT_TOUCHSCREEN is not set 1111# CONFIG_JOYSTICK_A3D is not set
912# CONFIG_INPUT_MISC is not set 1112# CONFIG_JOYSTICK_ADI is not set
1113# CONFIG_JOYSTICK_COBRA is not set
1114# CONFIG_JOYSTICK_GF2K is not set
1115# CONFIG_JOYSTICK_GRIP is not set
1116# CONFIG_JOYSTICK_GRIP_MP is not set
1117# CONFIG_JOYSTICK_GUILLEMOT is not set
1118# CONFIG_JOYSTICK_INTERACT is not set
1119# CONFIG_JOYSTICK_SIDEWINDER is not set
1120# CONFIG_JOYSTICK_TMDC is not set
1121# CONFIG_JOYSTICK_IFORCE is not set
1122# CONFIG_JOYSTICK_WARRIOR is not set
1123# CONFIG_JOYSTICK_MAGELLAN is not set
1124# CONFIG_JOYSTICK_SPACEORB is not set
1125# CONFIG_JOYSTICK_SPACEBALL is not set
1126# CONFIG_JOYSTICK_STINGER is not set
1127# CONFIG_JOYSTICK_TWIDJOY is not set
1128# CONFIG_JOYSTICK_ZHENHUA is not set
1129# CONFIG_JOYSTICK_JOYDUMP is not set
1130# CONFIG_JOYSTICK_XPAD is not set
1131CONFIG_INPUT_TABLET=y
1132# CONFIG_TABLET_USB_ACECAD is not set
1133# CONFIG_TABLET_USB_AIPTEK is not set
1134# CONFIG_TABLET_USB_GTCO is not set
1135# CONFIG_TABLET_USB_KBTAB is not set
1136# CONFIG_TABLET_USB_WACOM is not set
1137CONFIG_INPUT_TOUCHSCREEN=y
1138# CONFIG_TOUCHSCREEN_FUJITSU is not set
1139# CONFIG_TOUCHSCREEN_GUNZE is not set
1140# CONFIG_TOUCHSCREEN_ELO is not set
1141# CONFIG_TOUCHSCREEN_MTOUCH is not set
1142# CONFIG_TOUCHSCREEN_MK712 is not set
1143# CONFIG_TOUCHSCREEN_PENMOUNT is not set
1144# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
1145# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
1146# CONFIG_TOUCHSCREEN_UCB1400 is not set
1147# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
1148CONFIG_INPUT_MISC=y
1149# CONFIG_INPUT_PCSPKR is not set
1150# CONFIG_INPUT_APANEL is not set
1151# CONFIG_INPUT_WISTRON_BTNS is not set
1152# CONFIG_INPUT_ATLAS_BTNS is not set
1153# CONFIG_INPUT_ATI_REMOTE is not set
1154# CONFIG_INPUT_ATI_REMOTE2 is not set
1155# CONFIG_INPUT_KEYSPAN_REMOTE is not set
1156# CONFIG_INPUT_POWERMATE is not set
1157# CONFIG_INPUT_YEALINK is not set
1158# CONFIG_INPUT_UINPUT is not set
913 1159
914# 1160#
915# Hardware I/O ports 1161# Hardware I/O ports
916# 1162#
917CONFIG_SERIO=y 1163CONFIG_SERIO=y
918CONFIG_SERIO_I8042=y 1164CONFIG_SERIO_I8042=y
919# CONFIG_SERIO_SERPORT is not set 1165CONFIG_SERIO_SERPORT=y
920# CONFIG_SERIO_CT82C710 is not set 1166# CONFIG_SERIO_CT82C710 is not set
921# CONFIG_SERIO_PCIPS2 is not set 1167# CONFIG_SERIO_PCIPS2 is not set
922CONFIG_SERIO_LIBPS2=y 1168CONFIG_SERIO_LIBPS2=y
@@ -929,8 +1175,26 @@ CONFIG_SERIO_LIBPS2=y
929CONFIG_VT=y 1175CONFIG_VT=y
930CONFIG_VT_CONSOLE=y 1176CONFIG_VT_CONSOLE=y
931CONFIG_HW_CONSOLE=y 1177CONFIG_HW_CONSOLE=y
932# CONFIG_VT_HW_CONSOLE_BINDING is not set 1178CONFIG_VT_HW_CONSOLE_BINDING=y
933# CONFIG_SERIAL_NONSTANDARD is not set 1179CONFIG_DEVKMEM=y
1180CONFIG_SERIAL_NONSTANDARD=y
1181# CONFIG_COMPUTONE is not set
1182# CONFIG_ROCKETPORT is not set
1183# CONFIG_CYCLADES is not set
1184# CONFIG_DIGIEPCA is not set
1185# CONFIG_MOXA_INTELLIO is not set
1186# CONFIG_MOXA_SMARTIO is not set
1187# CONFIG_ISI is not set
1188# CONFIG_SYNCLINK is not set
1189# CONFIG_SYNCLINKMP is not set
1190# CONFIG_SYNCLINK_GT is not set
1191# CONFIG_N_HDLC is not set
1192# CONFIG_RISCOM8 is not set
1193# CONFIG_SPECIALIX is not set
1194# CONFIG_SX is not set
1195# CONFIG_RIO is not set
1196# CONFIG_STALDRV is not set
1197# CONFIG_NOZOMI is not set
934 1198
935# 1199#
936# Serial drivers 1200# Serial drivers
@@ -940,9 +1204,14 @@ CONFIG_SERIAL_8250_CONSOLE=y
940CONFIG_FIX_EARLYCON_MEM=y 1204CONFIG_FIX_EARLYCON_MEM=y
941CONFIG_SERIAL_8250_PCI=y 1205CONFIG_SERIAL_8250_PCI=y
942CONFIG_SERIAL_8250_PNP=y 1206CONFIG_SERIAL_8250_PNP=y
943CONFIG_SERIAL_8250_NR_UARTS=4 1207# CONFIG_SERIAL_8250_CS is not set
1208CONFIG_SERIAL_8250_NR_UARTS=32
944CONFIG_SERIAL_8250_RUNTIME_UARTS=4 1209CONFIG_SERIAL_8250_RUNTIME_UARTS=4
945# CONFIG_SERIAL_8250_EXTENDED is not set 1210CONFIG_SERIAL_8250_EXTENDED=y
1211CONFIG_SERIAL_8250_MANY_PORTS=y
1212CONFIG_SERIAL_8250_SHARE_IRQ=y
1213CONFIG_SERIAL_8250_DETECT_IRQ=y
1214CONFIG_SERIAL_8250_RSA=y
946 1215
947# 1216#
948# Non-8250 serial port support 1217# Non-8250 serial port support
@@ -951,89 +1220,275 @@ CONFIG_SERIAL_CORE=y
951CONFIG_SERIAL_CORE_CONSOLE=y 1220CONFIG_SERIAL_CORE_CONSOLE=y
952# CONFIG_SERIAL_JSM is not set 1221# CONFIG_SERIAL_JSM is not set
953CONFIG_UNIX98_PTYS=y 1222CONFIG_UNIX98_PTYS=y
954CONFIG_LEGACY_PTYS=y 1223# CONFIG_LEGACY_PTYS is not set
955CONFIG_LEGACY_PTY_COUNT=256
956# CONFIG_IPMI_HANDLER is not set 1224# CONFIG_IPMI_HANDLER is not set
957# CONFIG_WATCHDOG is not set
958CONFIG_HW_RANDOM=y 1225CONFIG_HW_RANDOM=y
959CONFIG_HW_RANDOM_INTEL=y 1226# CONFIG_HW_RANDOM_INTEL is not set
960CONFIG_HW_RANDOM_AMD=y 1227# CONFIG_HW_RANDOM_AMD is not set
961CONFIG_HW_RANDOM_GEODE=y 1228CONFIG_HW_RANDOM_GEODE=y
962CONFIG_HW_RANDOM_VIA=y 1229CONFIG_HW_RANDOM_VIA=y
963# CONFIG_NVRAM is not set 1230CONFIG_NVRAM=y
964CONFIG_RTC=y
965# CONFIG_R3964 is not set 1231# CONFIG_R3964 is not set
966# CONFIG_APPLICOM is not set 1232# CONFIG_APPLICOM is not set
967# CONFIG_SONYPI is not set 1233# CONFIG_SONYPI is not set
968CONFIG_AGP=y 1234
969# CONFIG_AGP_ALI is not set 1235#
970# CONFIG_AGP_ATI is not set 1236# PCMCIA character devices
971# CONFIG_AGP_AMD is not set 1237#
972CONFIG_AGP_AMD64=y 1238# CONFIG_SYNCLINK_CS is not set
973CONFIG_AGP_INTEL=y 1239# CONFIG_CARDMAN_4000 is not set
974# CONFIG_AGP_NVIDIA is not set 1240# CONFIG_CARDMAN_4040 is not set
975# CONFIG_AGP_SIS is not set 1241# CONFIG_IPWIRELESS is not set
976# CONFIG_AGP_SWORKS is not set
977# CONFIG_AGP_VIA is not set
978# CONFIG_AGP_EFFICEON is not set
979# CONFIG_DRM is not set
980# CONFIG_MWAVE is not set 1242# CONFIG_MWAVE is not set
981# CONFIG_PC8736x_GPIO is not set 1243# CONFIG_PC8736x_GPIO is not set
982# CONFIG_NSC_GPIO is not set 1244# CONFIG_NSC_GPIO is not set
983# CONFIG_CS5535_GPIO is not set 1245# CONFIG_CS5535_GPIO is not set
984CONFIG_RAW_DRIVER=y 1246# CONFIG_RAW_DRIVER is not set
985CONFIG_MAX_RAW_DEVS=256
986CONFIG_HPET=y 1247CONFIG_HPET=y
987# CONFIG_HPET_RTC_IRQ is not set 1248# CONFIG_HPET_RTC_IRQ is not set
988CONFIG_HPET_MMAP=y 1249# CONFIG_HPET_MMAP is not set
989# CONFIG_HANGCHECK_TIMER is not set 1250# CONFIG_HANGCHECK_TIMER is not set
990# CONFIG_TCG_TPM is not set 1251# CONFIG_TCG_TPM is not set
991# CONFIG_TELCLOCK is not set 1252# CONFIG_TELCLOCK is not set
992CONFIG_DEVPORT=y 1253CONFIG_DEVPORT=y
993# CONFIG_I2C is not set 1254CONFIG_I2C=y
994 1255CONFIG_I2C_BOARDINFO=y
995# 1256# CONFIG_I2C_CHARDEV is not set
996# SPI support 1257
997# 1258#
1259# I2C Hardware Bus support
1260#
1261# CONFIG_I2C_ALI1535 is not set
1262# CONFIG_I2C_ALI1563 is not set
1263# CONFIG_I2C_ALI15X3 is not set
1264# CONFIG_I2C_AMD756 is not set
1265# CONFIG_I2C_AMD8111 is not set
1266CONFIG_I2C_I801=y
1267# CONFIG_I2C_I810 is not set
1268# CONFIG_I2C_PIIX4 is not set
1269# CONFIG_I2C_NFORCE2 is not set
1270# CONFIG_I2C_OCORES is not set
1271# CONFIG_I2C_PARPORT_LIGHT is not set
1272# CONFIG_I2C_PROSAVAGE is not set
1273# CONFIG_I2C_SAVAGE4 is not set
1274# CONFIG_I2C_SIMTEC is not set
1275# CONFIG_SCx200_ACB is not set
1276# CONFIG_I2C_SIS5595 is not set
1277# CONFIG_I2C_SIS630 is not set
1278# CONFIG_I2C_SIS96X is not set
1279# CONFIG_I2C_TAOS_EVM is not set
1280# CONFIG_I2C_STUB is not set
1281# CONFIG_I2C_TINY_USB is not set
1282# CONFIG_I2C_VIA is not set
1283# CONFIG_I2C_VIAPRO is not set
1284# CONFIG_I2C_VOODOO3 is not set
1285# CONFIG_I2C_PCA_PLATFORM is not set
1286
1287#
1288# Miscellaneous I2C Chip support
1289#
1290# CONFIG_DS1682 is not set
1291# CONFIG_SENSORS_EEPROM is not set
1292# CONFIG_SENSORS_PCF8574 is not set
1293# CONFIG_PCF8575 is not set
1294# CONFIG_SENSORS_PCF8591 is not set
1295# CONFIG_SENSORS_MAX6875 is not set
1296# CONFIG_SENSORS_TSL2550 is not set
1297# CONFIG_I2C_DEBUG_CORE is not set
1298# CONFIG_I2C_DEBUG_ALGO is not set
1299# CONFIG_I2C_DEBUG_BUS is not set
1300# CONFIG_I2C_DEBUG_CHIP is not set
998# CONFIG_SPI is not set 1301# CONFIG_SPI is not set
999# CONFIG_SPI_MASTER is not set
1000# CONFIG_W1 is not set 1302# CONFIG_W1 is not set
1001# CONFIG_POWER_SUPPLY is not set 1303CONFIG_POWER_SUPPLY=y
1304# CONFIG_POWER_SUPPLY_DEBUG is not set
1305# CONFIG_PDA_POWER is not set
1306# CONFIG_BATTERY_DS2760 is not set
1002# CONFIG_HWMON is not set 1307# CONFIG_HWMON is not set
1308CONFIG_THERMAL=y
1309CONFIG_WATCHDOG=y
1310# CONFIG_WATCHDOG_NOWAYOUT is not set
1311
1312#
1313# Watchdog Device Drivers
1314#
1315# CONFIG_SOFT_WATCHDOG is not set
1316# CONFIG_ACQUIRE_WDT is not set
1317# CONFIG_ADVANTECH_WDT is not set
1318# CONFIG_ALIM1535_WDT is not set
1319# CONFIG_ALIM7101_WDT is not set
1320# CONFIG_SC520_WDT is not set
1321# CONFIG_EUROTECH_WDT is not set
1322# CONFIG_IB700_WDT is not set
1323# CONFIG_IBMASR is not set
1324# CONFIG_WAFER_WDT is not set
1325# CONFIG_I6300ESB_WDT is not set
1326# CONFIG_ITCO_WDT is not set
1327# CONFIG_IT8712F_WDT is not set
1328# CONFIG_HP_WATCHDOG is not set
1329# CONFIG_SC1200_WDT is not set
1330# CONFIG_PC87413_WDT is not set
1331# CONFIG_60XX_WDT is not set
1332# CONFIG_SBC8360_WDT is not set
1333# CONFIG_SBC7240_WDT is not set
1334# CONFIG_CPU5_WDT is not set
1335# CONFIG_SMSC37B787_WDT is not set
1336# CONFIG_W83627HF_WDT is not set
1337# CONFIG_W83697HF_WDT is not set
1338# CONFIG_W83877F_WDT is not set
1339# CONFIG_W83977F_WDT is not set
1340# CONFIG_MACHZ_WDT is not set
1341# CONFIG_SBC_EPX_C3_WATCHDOG is not set
1342
1343#
1344# PCI-based Watchdog Cards
1345#
1346# CONFIG_PCIPCWATCHDOG is not set
1347# CONFIG_WDTPCI is not set
1348
1349#
1350# USB-based Watchdog Cards
1351#
1352# CONFIG_USBPCWATCHDOG is not set
1353
1354#
1355# Sonics Silicon Backplane
1356#
1357CONFIG_SSB_POSSIBLE=y
1358# CONFIG_SSB is not set
1003 1359
1004# 1360#
1005# Multifunction device drivers 1361# Multifunction device drivers
1006# 1362#
1007# CONFIG_MFD_SM501 is not set 1363# CONFIG_MFD_SM501 is not set
1364# CONFIG_HTC_PASIC3 is not set
1008 1365
1009# 1366#
1010# Multimedia devices 1367# Multimedia devices
1011# 1368#
1369
1370#
1371# Multimedia core support
1372#
1012# CONFIG_VIDEO_DEV is not set 1373# CONFIG_VIDEO_DEV is not set
1013# CONFIG_DVB_CORE is not set 1374# CONFIG_DVB_CORE is not set
1375
1376#
1377# Multimedia drivers
1378#
1014CONFIG_DAB=y 1379CONFIG_DAB=y
1015# CONFIG_USB_DABUSB is not set 1380# CONFIG_USB_DABUSB is not set
1016 1381
1017# 1382#
1018# Graphics support 1383# Graphics support
1019# 1384#
1020# CONFIG_BACKLIGHT_LCD_SUPPORT is not set 1385CONFIG_AGP=y
1386# CONFIG_AGP_ALI is not set
1387# CONFIG_AGP_ATI is not set
1388# CONFIG_AGP_AMD is not set
1389CONFIG_AGP_AMD64=y
1390CONFIG_AGP_INTEL=y
1391# CONFIG_AGP_NVIDIA is not set
1392# CONFIG_AGP_SIS is not set
1393# CONFIG_AGP_SWORKS is not set
1394# CONFIG_AGP_VIA is not set
1395# CONFIG_AGP_EFFICEON is not set
1396CONFIG_DRM=y
1397# CONFIG_DRM_TDFX is not set
1398# CONFIG_DRM_R128 is not set
1399# CONFIG_DRM_RADEON is not set
1400# CONFIG_DRM_I810 is not set
1401# CONFIG_DRM_I830 is not set
1402CONFIG_DRM_I915=y
1403# CONFIG_DRM_MGA is not set
1404# CONFIG_DRM_SIS is not set
1405# CONFIG_DRM_VIA is not set
1406# CONFIG_DRM_SAVAGE is not set
1407# CONFIG_VGASTATE is not set
1408# CONFIG_VIDEO_OUTPUT_CONTROL is not set
1409CONFIG_FB=y
1410# CONFIG_FIRMWARE_EDID is not set
1411# CONFIG_FB_DDC is not set
1412CONFIG_FB_CFB_FILLRECT=y
1413CONFIG_FB_CFB_COPYAREA=y
1414CONFIG_FB_CFB_IMAGEBLIT=y
1415# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
1416# CONFIG_FB_SYS_FILLRECT is not set
1417# CONFIG_FB_SYS_COPYAREA is not set
1418# CONFIG_FB_SYS_IMAGEBLIT is not set
1419# CONFIG_FB_FOREIGN_ENDIAN is not set
1420# CONFIG_FB_SYS_FOPS is not set
1421CONFIG_FB_DEFERRED_IO=y
1422# CONFIG_FB_SVGALIB is not set
1423# CONFIG_FB_MACMODES is not set
1424# CONFIG_FB_BACKLIGHT is not set
1425CONFIG_FB_MODE_HELPERS=y
1426CONFIG_FB_TILEBLITTING=y
1427
1428#
1429# Frame buffer hardware drivers
1430#
1431# CONFIG_FB_CIRRUS is not set
1432# CONFIG_FB_PM2 is not set
1433# CONFIG_FB_CYBER2000 is not set
1434# CONFIG_FB_ARC is not set
1435# CONFIG_FB_ASILIANT is not set
1436# CONFIG_FB_IMSTT is not set
1437# CONFIG_FB_VGA16 is not set
1438# CONFIG_FB_UVESA is not set
1439# CONFIG_FB_VESA is not set
1440CONFIG_FB_EFI=y
1441# CONFIG_FB_IMAC is not set
1442# CONFIG_FB_N411 is not set
1443# CONFIG_FB_HGA is not set
1444# CONFIG_FB_S1D13XXX is not set
1445# CONFIG_FB_NVIDIA is not set
1446# CONFIG_FB_RIVA is not set
1447# CONFIG_FB_I810 is not set
1448# CONFIG_FB_LE80578 is not set
1449# CONFIG_FB_INTEL is not set
1450# CONFIG_FB_MATROX is not set
1451# CONFIG_FB_RADEON is not set
1452# CONFIG_FB_ATY128 is not set
1453# CONFIG_FB_ATY is not set
1454# CONFIG_FB_S3 is not set
1455# CONFIG_FB_SAVAGE is not set
1456# CONFIG_FB_SIS is not set
1457# CONFIG_FB_NEOMAGIC is not set
1458# CONFIG_FB_KYRO is not set
1459# CONFIG_FB_3DFX is not set
1460# CONFIG_FB_VOODOO1 is not set
1461# CONFIG_FB_VT8623 is not set
1462# CONFIG_FB_CYBLA is not set
1463# CONFIG_FB_TRIDENT is not set
1464# CONFIG_FB_ARK is not set
1465# CONFIG_FB_PM3 is not set
1466# CONFIG_FB_GEODE is not set
1467# CONFIG_FB_VIRTUAL is not set
1468CONFIG_BACKLIGHT_LCD_SUPPORT=y
1469# CONFIG_LCD_CLASS_DEVICE is not set
1470CONFIG_BACKLIGHT_CLASS_DEVICE=y
1471# CONFIG_BACKLIGHT_CORGI is not set
1472# CONFIG_BACKLIGHT_PROGEAR is not set
1021 1473
1022# 1474#
1023# Display device support 1475# Display device support
1024# 1476#
1025# CONFIG_DISPLAY_SUPPORT is not set 1477# CONFIG_DISPLAY_SUPPORT is not set
1026# CONFIG_VGASTATE is not set
1027# CONFIG_FB is not set
1028 1478
1029# 1479#
1030# Console display driver support 1480# Console display driver support
1031# 1481#
1032CONFIG_VGA_CONSOLE=y 1482CONFIG_VGA_CONSOLE=y
1033CONFIG_VGACON_SOFT_SCROLLBACK=y 1483CONFIG_VGACON_SOFT_SCROLLBACK=y
1034CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 1484CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
1035CONFIG_VIDEO_SELECT=y 1485CONFIG_VIDEO_SELECT=y
1036CONFIG_DUMMY_CONSOLE=y 1486CONFIG_DUMMY_CONSOLE=y
1487# CONFIG_FRAMEBUFFER_CONSOLE is not set
1488CONFIG_LOGO=y
1489# CONFIG_LOGO_LINUX_MONO is not set
1490# CONFIG_LOGO_LINUX_VGA16 is not set
1491CONFIG_LOGO_LINUX_CLUT224=y
1037 1492
1038# 1493#
1039# Sound 1494# Sound
@@ -1043,33 +1498,167 @@ CONFIG_SOUND=y
1043# 1498#
1044# Advanced Linux Sound Architecture 1499# Advanced Linux Sound Architecture
1045# 1500#
1046# CONFIG_SND is not set 1501CONFIG_SND=y
1502CONFIG_SND_TIMER=y
1503CONFIG_SND_PCM=y
1504CONFIG_SND_HWDEP=y
1505CONFIG_SND_SEQUENCER=y
1506CONFIG_SND_SEQ_DUMMY=y
1507CONFIG_SND_OSSEMUL=y
1508CONFIG_SND_MIXER_OSS=y
1509CONFIG_SND_PCM_OSS=y
1510CONFIG_SND_PCM_OSS_PLUGINS=y
1511CONFIG_SND_SEQUENCER_OSS=y
1512CONFIG_SND_DYNAMIC_MINORS=y
1513CONFIG_SND_SUPPORT_OLD_API=y
1514CONFIG_SND_VERBOSE_PROCFS=y
1515# CONFIG_SND_VERBOSE_PRINTK is not set
1516# CONFIG_SND_DEBUG is not set
1517CONFIG_SND_VMASTER=y
1518
1519#
1520# Generic devices
1521#
1522# CONFIG_SND_PCSP is not set
1523# CONFIG_SND_DUMMY is not set
1524# CONFIG_SND_VIRMIDI is not set
1525# CONFIG_SND_MTPAV is not set
1526# CONFIG_SND_SERIAL_U16550 is not set
1527# CONFIG_SND_MPU401 is not set
1528
1529#
1530# PCI devices
1531#
1532# CONFIG_SND_AD1889 is not set
1533# CONFIG_SND_ALS300 is not set
1534# CONFIG_SND_ALS4000 is not set
1535# CONFIG_SND_ALI5451 is not set
1536# CONFIG_SND_ATIIXP is not set
1537# CONFIG_SND_ATIIXP_MODEM is not set
1538# CONFIG_SND_AU8810 is not set
1539# CONFIG_SND_AU8820 is not set
1540# CONFIG_SND_AU8830 is not set
1541# CONFIG_SND_AW2 is not set
1542# CONFIG_SND_AZT3328 is not set
1543# CONFIG_SND_BT87X is not set
1544# CONFIG_SND_CA0106 is not set
1545# CONFIG_SND_CMIPCI is not set
1546# CONFIG_SND_OXYGEN is not set
1547# CONFIG_SND_CS4281 is not set
1548# CONFIG_SND_CS46XX is not set
1549# CONFIG_SND_CS5530 is not set
1550# CONFIG_SND_CS5535AUDIO is not set
1551# CONFIG_SND_DARLA20 is not set
1552# CONFIG_SND_GINA20 is not set
1553# CONFIG_SND_LAYLA20 is not set
1554# CONFIG_SND_DARLA24 is not set
1555# CONFIG_SND_GINA24 is not set
1556# CONFIG_SND_LAYLA24 is not set
1557# CONFIG_SND_MONA is not set
1558# CONFIG_SND_MIA is not set
1559# CONFIG_SND_ECHO3G is not set
1560# CONFIG_SND_INDIGO is not set
1561# CONFIG_SND_INDIGOIO is not set
1562# CONFIG_SND_INDIGODJ is not set
1563# CONFIG_SND_EMU10K1 is not set
1564# CONFIG_SND_EMU10K1X is not set
1565# CONFIG_SND_ENS1370 is not set
1566# CONFIG_SND_ENS1371 is not set
1567# CONFIG_SND_ES1938 is not set
1568# CONFIG_SND_ES1968 is not set
1569# CONFIG_SND_FM801 is not set
1570CONFIG_SND_HDA_INTEL=y
1571CONFIG_SND_HDA_HWDEP=y
1572CONFIG_SND_HDA_CODEC_REALTEK=y
1573CONFIG_SND_HDA_CODEC_ANALOG=y
1574CONFIG_SND_HDA_CODEC_SIGMATEL=y
1575CONFIG_SND_HDA_CODEC_VIA=y
1576CONFIG_SND_HDA_CODEC_ATIHDMI=y
1577CONFIG_SND_HDA_CODEC_CONEXANT=y
1578CONFIG_SND_HDA_CODEC_CMEDIA=y
1579CONFIG_SND_HDA_CODEC_SI3054=y
1580CONFIG_SND_HDA_GENERIC=y
1581# CONFIG_SND_HDA_POWER_SAVE is not set
1582# CONFIG_SND_HDSP is not set
1583# CONFIG_SND_HDSPM is not set
1584# CONFIG_SND_HIFIER is not set
1585# CONFIG_SND_ICE1712 is not set
1586# CONFIG_SND_ICE1724 is not set
1587# CONFIG_SND_INTEL8X0 is not set
1588# CONFIG_SND_INTEL8X0M is not set
1589# CONFIG_SND_KORG1212 is not set
1590# CONFIG_SND_MAESTRO3 is not set
1591# CONFIG_SND_MIXART is not set
1592# CONFIG_SND_NM256 is not set
1593# CONFIG_SND_PCXHR is not set
1594# CONFIG_SND_RIPTIDE is not set
1595# CONFIG_SND_RME32 is not set
1596# CONFIG_SND_RME96 is not set
1597# CONFIG_SND_RME9652 is not set
1598# CONFIG_SND_SIS7019 is not set
1599# CONFIG_SND_SONICVIBES is not set
1600# CONFIG_SND_TRIDENT is not set
1601# CONFIG_SND_VIA82XX is not set
1602# CONFIG_SND_VIA82XX_MODEM is not set
1603# CONFIG_SND_VIRTUOSO is not set
1604# CONFIG_SND_VX222 is not set
1605# CONFIG_SND_YMFPCI is not set
1606
1607#
1608# USB devices
1609#
1610# CONFIG_SND_USB_AUDIO is not set
1611# CONFIG_SND_USB_USX2Y is not set
1612# CONFIG_SND_USB_CAIAQ is not set
1613
1614#
1615# PCMCIA devices
1616#
1617# CONFIG_SND_VXPOCKET is not set
1618# CONFIG_SND_PDAUDIOCF is not set
1619
1620#
1621# System on Chip audio support
1622#
1623# CONFIG_SND_SOC is not set
1624
1625#
1626# ALSA SoC audio for Freescale SOCs
1627#
1628
1629#
1630# SoC Audio for the Texas Instruments OMAP
1631#
1047 1632
1048# 1633#
1049# Open Sound System 1634# Open Sound System
1050# 1635#
1051CONFIG_SOUND_PRIME=y 1636# CONFIG_SOUND_PRIME is not set
1052# CONFIG_SOUND_TRIDENT is not set
1053# CONFIG_SOUND_MSNDCLAS is not set
1054# CONFIG_SOUND_MSNDPIN is not set
1055# CONFIG_SOUND_OSS is not set
1056CONFIG_HID_SUPPORT=y 1637CONFIG_HID_SUPPORT=y
1057CONFIG_HID=y 1638CONFIG_HID=y
1058# CONFIG_HID_DEBUG is not set 1639CONFIG_HID_DEBUG=y
1640CONFIG_HIDRAW=y
1059 1641
1060# 1642#
1061# USB Input Devices 1643# USB Input Devices
1062# 1644#
1063CONFIG_USB_HID=y 1645CONFIG_USB_HID=y
1064# CONFIG_USB_HIDINPUT_POWERBOOK is not set 1646CONFIG_USB_HIDINPUT_POWERBOOK=y
1065# CONFIG_HID_FF is not set 1647CONFIG_HID_FF=y
1066# CONFIG_USB_HIDDEV is not set 1648CONFIG_HID_PID=y
1649CONFIG_LOGITECH_FF=y
1650# CONFIG_LOGIRUMBLEPAD2_FF is not set
1651CONFIG_PANTHERLORD_FF=y
1652CONFIG_THRUSTMASTER_FF=y
1653CONFIG_ZEROPLUS_FF=y
1654CONFIG_USB_HIDDEV=y
1067CONFIG_USB_SUPPORT=y 1655CONFIG_USB_SUPPORT=y
1068CONFIG_USB_ARCH_HAS_HCD=y 1656CONFIG_USB_ARCH_HAS_HCD=y
1069CONFIG_USB_ARCH_HAS_OHCI=y 1657CONFIG_USB_ARCH_HAS_OHCI=y
1070CONFIG_USB_ARCH_HAS_EHCI=y 1658CONFIG_USB_ARCH_HAS_EHCI=y
1071CONFIG_USB=y 1659CONFIG_USB=y
1072# CONFIG_USB_DEBUG is not set 1660CONFIG_USB_DEBUG=y
1661CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
1073 1662
1074# 1663#
1075# Miscellaneous USB options 1664# Miscellaneous USB options
@@ -1077,18 +1666,18 @@ CONFIG_USB=y
1077CONFIG_USB_DEVICEFS=y 1666CONFIG_USB_DEVICEFS=y
1078# CONFIG_USB_DEVICE_CLASS is not set 1667# CONFIG_USB_DEVICE_CLASS is not set
1079# CONFIG_USB_DYNAMIC_MINORS is not set 1668# CONFIG_USB_DYNAMIC_MINORS is not set
1080# CONFIG_USB_SUSPEND is not set 1669CONFIG_USB_SUSPEND=y
1081# CONFIG_USB_PERSIST is not set
1082# CONFIG_USB_OTG is not set 1670# CONFIG_USB_OTG is not set
1083 1671
1084# 1672#
1085# USB Host Controller Drivers 1673# USB Host Controller Drivers
1086# 1674#
1675# CONFIG_USB_C67X00_HCD is not set
1087CONFIG_USB_EHCI_HCD=y 1676CONFIG_USB_EHCI_HCD=y
1088# CONFIG_USB_EHCI_SPLIT_ISO is not set
1089# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1677# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1090# CONFIG_USB_EHCI_TT_NEWSCHED is not set 1678# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1091# CONFIG_USB_ISP116X_HCD is not set 1679# CONFIG_USB_ISP116X_HCD is not set
1680# CONFIG_USB_ISP1760_HCD is not set
1092CONFIG_USB_OHCI_HCD=y 1681CONFIG_USB_OHCI_HCD=y
1093# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set 1682# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
1094# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set 1683# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
@@ -1121,8 +1710,10 @@ CONFIG_USB_STORAGE=y
1121# CONFIG_USB_STORAGE_SDDR55 is not set 1710# CONFIG_USB_STORAGE_SDDR55 is not set
1122# CONFIG_USB_STORAGE_JUMPSHOT is not set 1711# CONFIG_USB_STORAGE_JUMPSHOT is not set
1123# CONFIG_USB_STORAGE_ALAUDA is not set 1712# CONFIG_USB_STORAGE_ALAUDA is not set
1713# CONFIG_USB_STORAGE_ONETOUCH is not set
1124# CONFIG_USB_STORAGE_KARMA is not set 1714# CONFIG_USB_STORAGE_KARMA is not set
1125# CONFIG_USB_LIBUSUAL is not set 1715# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
1716CONFIG_USB_LIBUSUAL=y
1126 1717
1127# 1718#
1128# USB Imaging devices 1719# USB Imaging devices
@@ -1134,10 +1725,6 @@ CONFIG_USB_MON=y
1134# 1725#
1135# USB port drivers 1726# USB port drivers
1136# 1727#
1137
1138#
1139# USB Serial Converter support
1140#
1141# CONFIG_USB_SERIAL is not set 1728# CONFIG_USB_SERIAL is not set
1142 1729
1143# 1730#
@@ -1163,90 +1750,125 @@ CONFIG_USB_MON=y
1163# CONFIG_USB_TRANCEVIBRATOR is not set 1750# CONFIG_USB_TRANCEVIBRATOR is not set
1164# CONFIG_USB_IOWARRIOR is not set 1751# CONFIG_USB_IOWARRIOR is not set
1165# CONFIG_USB_TEST is not set 1752# CONFIG_USB_TEST is not set
1753# CONFIG_USB_GADGET is not set
1754# CONFIG_MMC is not set
1755# CONFIG_MEMSTICK is not set
1756CONFIG_NEW_LEDS=y
1757CONFIG_LEDS_CLASS=y
1166 1758
1167# 1759#
1168# USB DSL modem support 1760# LED drivers
1169# 1761#
1762# CONFIG_LEDS_CLEVO_MAIL is not set
1170 1763
1171# 1764#
1172# USB Gadget Support 1765# LED Triggers
1173# 1766#
1174# CONFIG_USB_GADGET is not set 1767CONFIG_LEDS_TRIGGERS=y
1175# CONFIG_MMC is not set 1768# CONFIG_LEDS_TRIGGER_TIMER is not set
1769# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
1770# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
1771# CONFIG_ACCESSIBILITY is not set
1772# CONFIG_INFINIBAND is not set
1773CONFIG_EDAC=y
1176 1774
1177# 1775#
1178# LED devices 1776# Reporting subsystems
1179# 1777#
1180# CONFIG_NEW_LEDS is not set 1778# CONFIG_EDAC_DEBUG is not set
1779# CONFIG_EDAC_MM_EDAC is not set
1780CONFIG_RTC_LIB=y
1781CONFIG_RTC_CLASS=y
1782# CONFIG_RTC_HCTOSYS is not set
1783# CONFIG_RTC_DEBUG is not set
1181 1784
1182# 1785#
1183# LED drivers 1786# RTC interfaces
1184# 1787#
1788CONFIG_RTC_INTF_SYSFS=y
1789CONFIG_RTC_INTF_PROC=y
1790CONFIG_RTC_INTF_DEV=y
1791# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
1792# CONFIG_RTC_DRV_TEST is not set
1185 1793
1186# 1794#
1187# LED Triggers 1795# I2C RTC drivers
1188# 1796#
1189# CONFIG_INFINIBAND is not set 1797# CONFIG_RTC_DRV_DS1307 is not set
1190# CONFIG_EDAC is not set 1798# CONFIG_RTC_DRV_DS1374 is not set
1799# CONFIG_RTC_DRV_DS1672 is not set
1800# CONFIG_RTC_DRV_MAX6900 is not set
1801# CONFIG_RTC_DRV_RS5C372 is not set
1802# CONFIG_RTC_DRV_ISL1208 is not set
1803# CONFIG_RTC_DRV_X1205 is not set
1804# CONFIG_RTC_DRV_PCF8563 is not set
1805# CONFIG_RTC_DRV_PCF8583 is not set
1806# CONFIG_RTC_DRV_M41T80 is not set
1807# CONFIG_RTC_DRV_S35390A is not set
1191 1808
1192# 1809#
1193# Real Time Clock 1810# SPI RTC drivers
1194# 1811#
1195# CONFIG_RTC_CLASS is not set
1196 1812
1197# 1813#
1198# DMA Engine support 1814# Platform RTC drivers
1199# 1815#
1200# CONFIG_DMA_ENGINE is not set 1816CONFIG_RTC_DRV_CMOS=y
1817# CONFIG_RTC_DRV_DS1511 is not set
1818# CONFIG_RTC_DRV_DS1553 is not set
1819# CONFIG_RTC_DRV_DS1742 is not set
1820# CONFIG_RTC_DRV_STK17TA8 is not set
1821# CONFIG_RTC_DRV_M48T86 is not set
1822# CONFIG_RTC_DRV_M48T59 is not set
1823# CONFIG_RTC_DRV_V3020 is not set
1201 1824
1202# 1825#
1203# DMA Clients 1826# on-CPU RTC drivers
1204# 1827#
1828CONFIG_DMADEVICES=y
1205 1829
1206# 1830#
1207# DMA Devices 1831# DMA Devices
1208# 1832#
1209CONFIG_VIRTUALIZATION=y 1833# CONFIG_INTEL_IOATDMA is not set
1210# CONFIG_KVM is not set 1834# CONFIG_UIO is not set
1211 1835
1212# 1836#
1213# Userspace I/O 1837# Firmware Drivers
1214# 1838#
1215# CONFIG_UIO is not set 1839# CONFIG_EDD is not set
1840CONFIG_EFI_VARS=y
1841# CONFIG_DELL_RBU is not set
1842# CONFIG_DCDBAS is not set
1843CONFIG_DMIID=y
1844# CONFIG_ISCSI_IBFT_FIND is not set
1216 1845
1217# 1846#
1218# File systems 1847# File systems
1219# 1848#
1220CONFIG_EXT2_FS=y 1849# CONFIG_EXT2_FS is not set
1221CONFIG_EXT2_FS_XATTR=y
1222CONFIG_EXT2_FS_POSIX_ACL=y
1223# CONFIG_EXT2_FS_SECURITY is not set
1224# CONFIG_EXT2_FS_XIP is not set
1225CONFIG_EXT3_FS=y 1850CONFIG_EXT3_FS=y
1226CONFIG_EXT3_FS_XATTR=y 1851CONFIG_EXT3_FS_XATTR=y
1227CONFIG_EXT3_FS_POSIX_ACL=y 1852CONFIG_EXT3_FS_POSIX_ACL=y
1228# CONFIG_EXT3_FS_SECURITY is not set 1853CONFIG_EXT3_FS_SECURITY=y
1229# CONFIG_EXT4DEV_FS is not set 1854# CONFIG_EXT4DEV_FS is not set
1230CONFIG_JBD=y 1855CONFIG_JBD=y
1231# CONFIG_JBD_DEBUG is not set 1856# CONFIG_JBD_DEBUG is not set
1232CONFIG_FS_MBCACHE=y 1857CONFIG_FS_MBCACHE=y
1233CONFIG_REISERFS_FS=y 1858# CONFIG_REISERFS_FS is not set
1234# CONFIG_REISERFS_CHECK is not set
1235# CONFIG_REISERFS_PROC_INFO is not set
1236CONFIG_REISERFS_FS_XATTR=y
1237CONFIG_REISERFS_FS_POSIX_ACL=y
1238# CONFIG_REISERFS_FS_SECURITY is not set
1239# CONFIG_JFS_FS is not set 1859# CONFIG_JFS_FS is not set
1240CONFIG_FS_POSIX_ACL=y 1860CONFIG_FS_POSIX_ACL=y
1241# CONFIG_XFS_FS is not set 1861# CONFIG_XFS_FS is not set
1242# CONFIG_GFS2_FS is not set
1243# CONFIG_OCFS2_FS is not set 1862# CONFIG_OCFS2_FS is not set
1244# CONFIG_MINIX_FS is not set 1863CONFIG_DNOTIFY=y
1245# CONFIG_ROMFS_FS is not set
1246CONFIG_INOTIFY=y 1864CONFIG_INOTIFY=y
1247CONFIG_INOTIFY_USER=y 1865CONFIG_INOTIFY_USER=y
1248# CONFIG_QUOTA is not set 1866CONFIG_QUOTA=y
1249CONFIG_DNOTIFY=y 1867CONFIG_QUOTA_NETLINK_INTERFACE=y
1868# CONFIG_PRINT_QUOTA_WARNING is not set
1869# CONFIG_QFMT_V1 is not set
1870CONFIG_QFMT_V2=y
1871CONFIG_QUOTACTL=y
1250# CONFIG_AUTOFS_FS is not set 1872# CONFIG_AUTOFS_FS is not set
1251CONFIG_AUTOFS4_FS=y 1873CONFIG_AUTOFS4_FS=y
1252# CONFIG_FUSE_FS is not set 1874# CONFIG_FUSE_FS is not set
@@ -1256,8 +1878,8 @@ CONFIG_GENERIC_ACL=y
1256# CD-ROM/DVD Filesystems 1878# CD-ROM/DVD Filesystems
1257# 1879#
1258CONFIG_ISO9660_FS=y 1880CONFIG_ISO9660_FS=y
1259# CONFIG_JOLIET is not set 1881CONFIG_JOLIET=y
1260# CONFIG_ZISOFS is not set 1882CONFIG_ZISOFS=y
1261# CONFIG_UDF_FS is not set 1883# CONFIG_UDF_FS is not set
1262 1884
1263# 1885#
@@ -1275,13 +1897,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
1275# 1897#
1276CONFIG_PROC_FS=y 1898CONFIG_PROC_FS=y
1277CONFIG_PROC_KCORE=y 1899CONFIG_PROC_KCORE=y
1900CONFIG_PROC_VMCORE=y
1278CONFIG_PROC_SYSCTL=y 1901CONFIG_PROC_SYSCTL=y
1279CONFIG_SYSFS=y 1902CONFIG_SYSFS=y
1280CONFIG_TMPFS=y 1903CONFIG_TMPFS=y
1281CONFIG_TMPFS_POSIX_ACL=y 1904CONFIG_TMPFS_POSIX_ACL=y
1282CONFIG_HUGETLBFS=y 1905CONFIG_HUGETLBFS=y
1283CONFIG_HUGETLB_PAGE=y 1906CONFIG_HUGETLB_PAGE=y
1284CONFIG_RAMFS=y
1285# CONFIG_CONFIGFS_FS is not set 1907# CONFIG_CONFIGFS_FS is not set
1286 1908
1287# 1909#
@@ -1289,6 +1911,7 @@ CONFIG_RAMFS=y
1289# 1911#
1290# CONFIG_ADFS_FS is not set 1912# CONFIG_ADFS_FS is not set
1291# CONFIG_AFFS_FS is not set 1913# CONFIG_AFFS_FS is not set
1914# CONFIG_ECRYPT_FS is not set
1292# CONFIG_HFS_FS is not set 1915# CONFIG_HFS_FS is not set
1293# CONFIG_HFSPLUS_FS is not set 1916# CONFIG_HFSPLUS_FS is not set
1294# CONFIG_BEFS_FS is not set 1917# CONFIG_BEFS_FS is not set
@@ -1296,33 +1919,15 @@ CONFIG_RAMFS=y
1296# CONFIG_EFS_FS is not set 1919# CONFIG_EFS_FS is not set
1297# CONFIG_CRAMFS is not set 1920# CONFIG_CRAMFS is not set
1298# CONFIG_VXFS_FS is not set 1921# CONFIG_VXFS_FS is not set
1922# CONFIG_MINIX_FS is not set
1299# CONFIG_HPFS_FS is not set 1923# CONFIG_HPFS_FS is not set
1300# CONFIG_QNX4FS_FS is not set 1924# CONFIG_QNX4FS_FS is not set
1925# CONFIG_ROMFS_FS is not set
1301# CONFIG_SYSV_FS is not set 1926# CONFIG_SYSV_FS is not set
1302# CONFIG_UFS_FS is not set 1927# CONFIG_UFS_FS is not set
1303 1928CONFIG_NETWORK_FILESYSTEMS=y
1304# 1929# CONFIG_NFS_FS is not set
1305# Network File Systems 1930# CONFIG_NFSD is not set
1306#
1307CONFIG_NFS_FS=y
1308CONFIG_NFS_V3=y
1309# CONFIG_NFS_V3_ACL is not set
1310# CONFIG_NFS_V4 is not set
1311# CONFIG_NFS_DIRECTIO is not set
1312CONFIG_NFSD=y
1313CONFIG_NFSD_V3=y
1314# CONFIG_NFSD_V3_ACL is not set
1315# CONFIG_NFSD_V4 is not set
1316CONFIG_NFSD_TCP=y
1317CONFIG_ROOT_NFS=y
1318CONFIG_LOCKD=y
1319CONFIG_LOCKD_V4=y
1320CONFIG_EXPORTFS=y
1321CONFIG_NFS_COMMON=y
1322CONFIG_SUNRPC=y
1323# CONFIG_SUNRPC_BIND34 is not set
1324# CONFIG_RPCSEC_GSS_KRB5 is not set
1325# CONFIG_RPCSEC_GSS_SPKM3 is not set
1326# CONFIG_SMB_FS is not set 1931# CONFIG_SMB_FS is not set
1327# CONFIG_CIFS is not set 1932# CONFIG_CIFS is not set
1328# CONFIG_NCP_FS is not set 1933# CONFIG_NCP_FS is not set
@@ -1332,14 +1937,26 @@ CONFIG_SUNRPC=y
1332# 1937#
1333# Partition Types 1938# Partition Types
1334# 1939#
1335# CONFIG_PARTITION_ADVANCED is not set 1940CONFIG_PARTITION_ADVANCED=y
1941# CONFIG_ACORN_PARTITION is not set
1942CONFIG_OSF_PARTITION=y
1943CONFIG_AMIGA_PARTITION=y
1944# CONFIG_ATARI_PARTITION is not set
1945CONFIG_MAC_PARTITION=y
1336CONFIG_MSDOS_PARTITION=y 1946CONFIG_MSDOS_PARTITION=y
1337 1947CONFIG_BSD_DISKLABEL=y
1338# 1948CONFIG_MINIX_SUBPARTITION=y
1339# Native Language Support 1949CONFIG_SOLARIS_X86_PARTITION=y
1340# 1950CONFIG_UNIXWARE_DISKLABEL=y
1951# CONFIG_LDM_PARTITION is not set
1952CONFIG_SGI_PARTITION=y
1953# CONFIG_ULTRIX_PARTITION is not set
1954CONFIG_SUN_PARTITION=y
1955CONFIG_KARMA_PARTITION=y
1956CONFIG_EFI_PARTITION=y
1957# CONFIG_SYSV68_PARTITION is not set
1341CONFIG_NLS=y 1958CONFIG_NLS=y
1342CONFIG_NLS_DEFAULT="iso8859-1" 1959CONFIG_NLS_DEFAULT="utf8"
1343CONFIG_NLS_CODEPAGE_437=y 1960CONFIG_NLS_CODEPAGE_437=y
1344# CONFIG_NLS_CODEPAGE_737 is not set 1961# CONFIG_NLS_CODEPAGE_737 is not set
1345# CONFIG_NLS_CODEPAGE_775 is not set 1962# CONFIG_NLS_CODEPAGE_775 is not set
@@ -1374,37 +1991,33 @@ CONFIG_NLS_ISO8859_1=y
1374# CONFIG_NLS_ISO8859_9 is not set 1991# CONFIG_NLS_ISO8859_9 is not set
1375# CONFIG_NLS_ISO8859_13 is not set 1992# CONFIG_NLS_ISO8859_13 is not set
1376# CONFIG_NLS_ISO8859_14 is not set 1993# CONFIG_NLS_ISO8859_14 is not set
1377CONFIG_NLS_ISO8859_15=y 1994# CONFIG_NLS_ISO8859_15 is not set
1378# CONFIG_NLS_KOI8_R is not set 1995# CONFIG_NLS_KOI8_R is not set
1379# CONFIG_NLS_KOI8_U is not set 1996# CONFIG_NLS_KOI8_U is not set
1380CONFIG_NLS_UTF8=y 1997CONFIG_NLS_UTF8=y
1381
1382#
1383# Distributed Lock Manager
1384#
1385# CONFIG_DLM is not set 1998# CONFIG_DLM is not set
1386CONFIG_INSTRUMENTATION=y
1387CONFIG_PROFILING=y
1388CONFIG_OPROFILE=y
1389CONFIG_KPROBES=y
1390 1999
1391# 2000#
1392# Kernel hacking 2001# Kernel hacking
1393# 2002#
1394CONFIG_TRACE_IRQFLAGS_SUPPORT=y 2003CONFIG_TRACE_IRQFLAGS_SUPPORT=y
1395# CONFIG_PRINTK_TIME is not set 2004# CONFIG_PRINTK_TIME is not set
2005# CONFIG_ENABLE_WARN_DEPRECATED is not set
1396# CONFIG_ENABLE_MUST_CHECK is not set 2006# CONFIG_ENABLE_MUST_CHECK is not set
2007CONFIG_FRAME_WARN=2048
1397CONFIG_MAGIC_SYSRQ=y 2008CONFIG_MAGIC_SYSRQ=y
1398CONFIG_UNUSED_SYMBOLS=y 2009# CONFIG_UNUSED_SYMBOLS is not set
1399# CONFIG_DEBUG_FS is not set 2010CONFIG_DEBUG_FS=y
1400# CONFIG_HEADERS_CHECK is not set 2011# CONFIG_HEADERS_CHECK is not set
1401CONFIG_DEBUG_KERNEL=y 2012CONFIG_DEBUG_KERNEL=y
1402# CONFIG_DEBUG_SHIRQ is not set 2013# CONFIG_DEBUG_SHIRQ is not set
1403CONFIG_DETECT_SOFTLOCKUP=y 2014# CONFIG_DETECT_SOFTLOCKUP is not set
1404# CONFIG_SCHED_DEBUG is not set 2015# CONFIG_SCHED_DEBUG is not set
1405# CONFIG_SCHEDSTATS is not set 2016CONFIG_SCHEDSTATS=y
1406CONFIG_TIMER_STATS=y 2017CONFIG_TIMER_STATS=y
2018# CONFIG_DEBUG_OBJECTS is not set
1407# CONFIG_SLUB_DEBUG_ON is not set 2019# CONFIG_SLUB_DEBUG_ON is not set
2020# CONFIG_SLUB_STATS is not set
1408# CONFIG_DEBUG_RT_MUTEXES is not set 2021# CONFIG_DEBUG_RT_MUTEXES is not set
1409# CONFIG_RT_MUTEX_TESTER is not set 2022# CONFIG_RT_MUTEX_TESTER is not set
1410# CONFIG_DEBUG_SPINLOCK is not set 2023# CONFIG_DEBUG_SPINLOCK is not set
@@ -1419,48 +2032,174 @@ CONFIG_TIMER_STATS=y
1419CONFIG_DEBUG_BUGVERBOSE=y 2032CONFIG_DEBUG_BUGVERBOSE=y
1420# CONFIG_DEBUG_INFO is not set 2033# CONFIG_DEBUG_INFO is not set
1421# CONFIG_DEBUG_VM is not set 2034# CONFIG_DEBUG_VM is not set
2035# CONFIG_DEBUG_WRITECOUNT is not set
1422# CONFIG_DEBUG_LIST is not set 2036# CONFIG_DEBUG_LIST is not set
1423# CONFIG_FRAME_POINTER is not set 2037# CONFIG_DEBUG_SG is not set
1424CONFIG_OPTIMIZE_INLINING=y 2038CONFIG_FRAME_POINTER=y
2039# CONFIG_BOOT_PRINTK_DELAY is not set
1425# CONFIG_RCU_TORTURE_TEST is not set 2040# CONFIG_RCU_TORTURE_TEST is not set
2041# CONFIG_KPROBES_SANITY_TEST is not set
2042# CONFIG_BACKTRACE_SELF_TEST is not set
1426# CONFIG_LKDTM is not set 2043# CONFIG_LKDTM is not set
1427# CONFIG_FAULT_INJECTION is not set 2044# CONFIG_FAULT_INJECTION is not set
2045# CONFIG_LATENCYTOP is not set
2046CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
2047# CONFIG_SAMPLES is not set
2048# CONFIG_KGDB is not set
2049CONFIG_HAVE_ARCH_KGDB=y
2050# CONFIG_NONPROMISC_DEVMEM is not set
1428CONFIG_EARLY_PRINTK=y 2051CONFIG_EARLY_PRINTK=y
1429CONFIG_DEBUG_STACKOVERFLOW=y 2052CONFIG_DEBUG_STACKOVERFLOW=y
1430# CONFIG_DEBUG_STACK_USAGE is not set 2053CONFIG_DEBUG_STACK_USAGE=y
1431# CONFIG_DEBUG_RODATA is not set 2054# CONFIG_DEBUG_PAGEALLOC is not set
2055# CONFIG_X86_PTDUMP is not set
2056CONFIG_DEBUG_RODATA=y
2057# CONFIG_DEBUG_RODATA_TEST is not set
2058CONFIG_DEBUG_NX_TEST=m
1432# CONFIG_4KSTACKS is not set 2059# CONFIG_4KSTACKS is not set
1433CONFIG_X86_FIND_SMP_CONFIG=y 2060CONFIG_X86_FIND_SMP_CONFIG=y
1434CONFIG_X86_MPPARSE=y 2061CONFIG_X86_MPPARSE=y
1435CONFIG_DOUBLEFAULT=y 2062CONFIG_DOUBLEFAULT=y
2063CONFIG_IO_DELAY_TYPE_0X80=0
2064CONFIG_IO_DELAY_TYPE_0XED=1
2065CONFIG_IO_DELAY_TYPE_UDELAY=2
2066CONFIG_IO_DELAY_TYPE_NONE=3
2067CONFIG_IO_DELAY_0X80=y
2068# CONFIG_IO_DELAY_0XED is not set
2069# CONFIG_IO_DELAY_UDELAY is not set
2070# CONFIG_IO_DELAY_NONE is not set
2071CONFIG_DEFAULT_IO_DELAY_TYPE=0
2072CONFIG_DEBUG_BOOT_PARAMS=y
2073# CONFIG_CPA_DEBUG is not set
1436 2074
1437# 2075#
1438# Security options 2076# Security options
1439# 2077#
1440# CONFIG_KEYS is not set 2078CONFIG_KEYS=y
1441# CONFIG_SECURITY is not set 2079CONFIG_KEYS_DEBUG_PROC_KEYS=y
1442# CONFIG_CRYPTO is not set 2080CONFIG_SECURITY=y
2081CONFIG_SECURITY_NETWORK=y
2082# CONFIG_SECURITY_NETWORK_XFRM is not set
2083CONFIG_SECURITY_CAPABILITIES=y
2084CONFIG_SECURITY_FILE_CAPABILITIES=y
2085# CONFIG_SECURITY_ROOTPLUG is not set
2086CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
2087CONFIG_SECURITY_SELINUX=y
2088CONFIG_SECURITY_SELINUX_BOOTPARAM=y
2089CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
2090CONFIG_SECURITY_SELINUX_DISABLE=y
2091CONFIG_SECURITY_SELINUX_DEVELOP=y
2092CONFIG_SECURITY_SELINUX_AVC_STATS=y
2093CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
2094# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set
2095# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
2096# CONFIG_SECURITY_SMACK is not set
2097CONFIG_CRYPTO=y
2098
2099#
2100# Crypto core or helper
2101#
2102CONFIG_CRYPTO_ALGAPI=y
2103CONFIG_CRYPTO_AEAD=y
2104CONFIG_CRYPTO_BLKCIPHER=y
2105CONFIG_CRYPTO_HASH=y
2106CONFIG_CRYPTO_MANAGER=y
2107# CONFIG_CRYPTO_GF128MUL is not set
2108# CONFIG_CRYPTO_NULL is not set
2109# CONFIG_CRYPTO_CRYPTD is not set
2110CONFIG_CRYPTO_AUTHENC=y
2111# CONFIG_CRYPTO_TEST is not set
2112
2113#
2114# Authenticated Encryption with Associated Data
2115#
2116# CONFIG_CRYPTO_CCM is not set
2117# CONFIG_CRYPTO_GCM is not set
2118# CONFIG_CRYPTO_SEQIV is not set
2119
2120#
2121# Block modes
2122#
2123CONFIG_CRYPTO_CBC=y
2124# CONFIG_CRYPTO_CTR is not set
2125# CONFIG_CRYPTO_CTS is not set
2126CONFIG_CRYPTO_ECB=y
2127# CONFIG_CRYPTO_LRW is not set
2128# CONFIG_CRYPTO_PCBC is not set
2129# CONFIG_CRYPTO_XTS is not set
2130
2131#
2132# Hash modes
2133#
2134CONFIG_CRYPTO_HMAC=y
2135# CONFIG_CRYPTO_XCBC is not set
2136
2137#
2138# Digest
2139#
2140# CONFIG_CRYPTO_CRC32C is not set
2141# CONFIG_CRYPTO_MD4 is not set
2142CONFIG_CRYPTO_MD5=y
2143# CONFIG_CRYPTO_MICHAEL_MIC is not set
2144CONFIG_CRYPTO_SHA1=y
2145# CONFIG_CRYPTO_SHA256 is not set
2146# CONFIG_CRYPTO_SHA512 is not set
2147# CONFIG_CRYPTO_TGR192 is not set
2148# CONFIG_CRYPTO_WP512 is not set
2149
2150#
2151# Ciphers
2152#
2153CONFIG_CRYPTO_AES=y
2154# CONFIG_CRYPTO_AES_586 is not set
2155# CONFIG_CRYPTO_ANUBIS is not set
2156CONFIG_CRYPTO_ARC4=y
2157# CONFIG_CRYPTO_BLOWFISH is not set
2158# CONFIG_CRYPTO_CAMELLIA is not set
2159# CONFIG_CRYPTO_CAST5 is not set
2160# CONFIG_CRYPTO_CAST6 is not set
2161CONFIG_CRYPTO_DES=y
2162# CONFIG_CRYPTO_FCRYPT is not set
2163# CONFIG_CRYPTO_KHAZAD is not set
2164# CONFIG_CRYPTO_SALSA20 is not set
2165# CONFIG_CRYPTO_SALSA20_586 is not set
2166# CONFIG_CRYPTO_SEED is not set
2167# CONFIG_CRYPTO_SERPENT is not set
2168# CONFIG_CRYPTO_TEA is not set
2169# CONFIG_CRYPTO_TWOFISH is not set
2170# CONFIG_CRYPTO_TWOFISH_586 is not set
2171
2172#
2173# Compression
2174#
2175# CONFIG_CRYPTO_DEFLATE is not set
2176# CONFIG_CRYPTO_LZO is not set
2177CONFIG_CRYPTO_HW=y
2178# CONFIG_CRYPTO_DEV_PADLOCK is not set
2179# CONFIG_CRYPTO_DEV_GEODE is not set
2180# CONFIG_CRYPTO_DEV_HIFN_795X is not set
2181CONFIG_HAVE_KVM=y
2182CONFIG_VIRTUALIZATION=y
2183# CONFIG_KVM is not set
2184# CONFIG_LGUEST is not set
2185# CONFIG_VIRTIO_PCI is not set
2186# CONFIG_VIRTIO_BALLOON is not set
1443 2187
1444# 2188#
1445# Library routines 2189# Library routines
1446# 2190#
1447CONFIG_BITREVERSE=y 2191CONFIG_BITREVERSE=y
2192CONFIG_GENERIC_FIND_FIRST_BIT=y
2193CONFIG_GENERIC_FIND_NEXT_BIT=y
1448# CONFIG_CRC_CCITT is not set 2194# CONFIG_CRC_CCITT is not set
1449# CONFIG_CRC16 is not set 2195# CONFIG_CRC16 is not set
1450# CONFIG_CRC_ITU_T is not set 2196# CONFIG_CRC_ITU_T is not set
1451CONFIG_CRC32=y 2197CONFIG_CRC32=y
1452# CONFIG_CRC7 is not set 2198# CONFIG_CRC7 is not set
1453# CONFIG_LIBCRC32C is not set 2199# CONFIG_LIBCRC32C is not set
2200CONFIG_AUDIT_GENERIC=y
1454CONFIG_ZLIB_INFLATE=y 2201CONFIG_ZLIB_INFLATE=y
1455CONFIG_PLIST=y 2202CONFIG_PLIST=y
1456CONFIG_HAS_IOMEM=y 2203CONFIG_HAS_IOMEM=y
1457CONFIG_HAS_IOPORT=y 2204CONFIG_HAS_IOPORT=y
1458CONFIG_HAS_DMA=y 2205CONFIG_HAS_DMA=y
1459CONFIG_GENERIC_HARDIRQS=y
1460CONFIG_GENERIC_IRQ_PROBE=y
1461CONFIG_GENERIC_PENDING_IRQ=y
1462CONFIG_X86_SMP=y
1463CONFIG_X86_HT=y
1464CONFIG_X86_BIOS_REBOOT=y
1465CONFIG_X86_TRAMPOLINE=y
1466CONFIG_KTIME_SCALAR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 2d6f5b2809d2..ae5124e064d4 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,64 +1,103 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.22-git14 3# Linux kernel version: 2.6.26-rc1
4# Fri Jul 20 09:53:15 2007 4# Sun May 4 19:59:57 2008
5# 5#
6CONFIG_X86_64=y
7CONFIG_64BIT=y 6CONFIG_64BIT=y
7# CONFIG_X86_32 is not set
8CONFIG_X86_64=y
8CONFIG_X86=y 9CONFIG_X86=y
10CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig"
11# CONFIG_GENERIC_LOCKBREAK is not set
9CONFIG_GENERIC_TIME=y 12CONFIG_GENERIC_TIME=y
10CONFIG_GENERIC_TIME_VSYSCALL=y
11CONFIG_GENERIC_CMOS_UPDATE=y 13CONFIG_GENERIC_CMOS_UPDATE=y
12CONFIG_ZONE_DMA32=y 14CONFIG_CLOCKSOURCE_WATCHDOG=y
15CONFIG_GENERIC_CLOCKEVENTS=y
16CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
13CONFIG_LOCKDEP_SUPPORT=y 17CONFIG_LOCKDEP_SUPPORT=y
14CONFIG_STACKTRACE_SUPPORT=y 18CONFIG_STACKTRACE_SUPPORT=y
15CONFIG_SEMAPHORE_SLEEPERS=y 19CONFIG_HAVE_LATENCYTOP_SUPPORT=y
20CONFIG_FAST_CMPXCHG_LOCAL=y
16CONFIG_MMU=y 21CONFIG_MMU=y
17CONFIG_ZONE_DMA=y 22CONFIG_ZONE_DMA=y
18CONFIG_QUICKLIST=y
19CONFIG_NR_QUICK=2
20CONFIG_RWSEM_GENERIC_SPINLOCK=y
21CONFIG_GENERIC_HWEIGHT=y
22CONFIG_GENERIC_CALIBRATE_DELAY=y
23CONFIG_X86_CMPXCHG=y
24CONFIG_EARLY_PRINTK=y
25CONFIG_GENERIC_ISA_DMA=y 23CONFIG_GENERIC_ISA_DMA=y
26CONFIG_GENERIC_IOMAP=y 24CONFIG_GENERIC_IOMAP=y
27CONFIG_ARCH_MAY_HAVE_PC_FDC=y
28CONFIG_ARCH_POPULATES_NODE_MAP=y
29CONFIG_DMI=y
30CONFIG_AUDIT_ARCH=y
31CONFIG_GENERIC_BUG=y 25CONFIG_GENERIC_BUG=y
26CONFIG_GENERIC_HWEIGHT=y
27# CONFIG_GENERIC_GPIO is not set
28CONFIG_ARCH_MAY_HAVE_PC_FDC=y
29CONFIG_RWSEM_GENERIC_SPINLOCK=y
30# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
32# CONFIG_ARCH_HAS_ILOG2_U32 is not set 31# CONFIG_ARCH_HAS_ILOG2_U32 is not set
33# CONFIG_ARCH_HAS_ILOG2_U64 is not set 32# CONFIG_ARCH_HAS_ILOG2_U64 is not set
34CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 33CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
34CONFIG_GENERIC_CALIBRATE_DELAY=y
35CONFIG_GENERIC_TIME_VSYSCALL=y
36CONFIG_ARCH_HAS_CPU_RELAX=y
37CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
38CONFIG_HAVE_SETUP_PER_CPU_AREA=y
39CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
40CONFIG_ARCH_HIBERNATION_POSSIBLE=y
41CONFIG_ARCH_SUSPEND_POSSIBLE=y
42CONFIG_ZONE_DMA32=y
43CONFIG_ARCH_POPULATES_NODE_MAP=y
44CONFIG_AUDIT_ARCH=y
45CONFIG_ARCH_SUPPORTS_AOUT=y
46CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
47CONFIG_GENERIC_HARDIRQS=y
48CONFIG_GENERIC_IRQ_PROBE=y
49CONFIG_GENERIC_PENDING_IRQ=y
50CONFIG_X86_SMP=y
51CONFIG_X86_64_SMP=y
52CONFIG_X86_HT=y
53CONFIG_X86_BIOS_REBOOT=y
54CONFIG_X86_TRAMPOLINE=y
55# CONFIG_KTIME_SCALAR is not set
35 56
36# 57#
37# Code maturity level options 58# General setup
38# 59#
39CONFIG_EXPERIMENTAL=y 60CONFIG_EXPERIMENTAL=y
40CONFIG_LOCK_KERNEL=y 61CONFIG_LOCK_KERNEL=y
41CONFIG_INIT_ENV_ARG_LIMIT=32 62CONFIG_INIT_ENV_ARG_LIMIT=32
42
43#
44# General setup
45#
46CONFIG_LOCALVERSION="" 63CONFIG_LOCALVERSION=""
47CONFIG_LOCALVERSION_AUTO=y 64# CONFIG_LOCALVERSION_AUTO is not set
48CONFIG_SWAP=y 65CONFIG_SWAP=y
49CONFIG_SYSVIPC=y 66CONFIG_SYSVIPC=y
50CONFIG_SYSVIPC_SYSCTL=y 67CONFIG_SYSVIPC_SYSCTL=y
51CONFIG_POSIX_MQUEUE=y 68CONFIG_POSIX_MQUEUE=y
52# CONFIG_BSD_PROCESS_ACCT is not set 69CONFIG_BSD_PROCESS_ACCT=y
53# CONFIG_TASKSTATS is not set 70# CONFIG_BSD_PROCESS_ACCT_V3 is not set
54# CONFIG_USER_NS is not set 71CONFIG_TASKSTATS=y
55# CONFIG_AUDIT is not set 72CONFIG_TASK_DELAY_ACCT=y
56CONFIG_IKCONFIG=y 73CONFIG_TASK_XACCT=y
57CONFIG_IKCONFIG_PROC=y 74CONFIG_TASK_IO_ACCOUNTING=y
58CONFIG_LOG_BUF_SHIFT=18 75CONFIG_AUDIT=y
59# CONFIG_CPUSETS is not set 76CONFIG_AUDITSYSCALL=y
60CONFIG_SYSFS_DEPRECATED=y 77CONFIG_AUDIT_TREE=y
78# CONFIG_IKCONFIG is not set
79CONFIG_LOG_BUF_SHIFT=17
80CONFIG_CGROUPS=y
81# CONFIG_CGROUP_DEBUG is not set
82CONFIG_CGROUP_NS=y
83# CONFIG_CGROUP_DEVICE is not set
84CONFIG_CPUSETS=y
85CONFIG_GROUP_SCHED=y
86CONFIG_FAIR_GROUP_SCHED=y
87# CONFIG_RT_GROUP_SCHED is not set
88# CONFIG_USER_SCHED is not set
89CONFIG_CGROUP_SCHED=y
90CONFIG_CGROUP_CPUACCT=y
91CONFIG_RESOURCE_COUNTERS=y
92# CONFIG_CGROUP_MEM_RES_CTLR is not set
93# CONFIG_SYSFS_DEPRECATED_V2 is not set
94CONFIG_PROC_PID_CPUSET=y
61CONFIG_RELAY=y 95CONFIG_RELAY=y
96CONFIG_NAMESPACES=y
97CONFIG_UTS_NS=y
98CONFIG_IPC_NS=y
99CONFIG_USER_NS=y
100CONFIG_PID_NS=y
62CONFIG_BLK_DEV_INITRD=y 101CONFIG_BLK_DEV_INITRD=y
63CONFIG_INITRAMFS_SOURCE="" 102CONFIG_INITRAMFS_SOURCE=""
64CONFIG_CC_OPTIMIZE_FOR_SIZE=y 103CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -66,13 +105,15 @@ CONFIG_SYSCTL=y
66# CONFIG_EMBEDDED is not set 105# CONFIG_EMBEDDED is not set
67CONFIG_UID16=y 106CONFIG_UID16=y
68CONFIG_SYSCTL_SYSCALL=y 107CONFIG_SYSCTL_SYSCALL=y
108CONFIG_SYSCTL_SYSCALL_CHECK=y
69CONFIG_KALLSYMS=y 109CONFIG_KALLSYMS=y
70CONFIG_KALLSYMS_ALL=y 110CONFIG_KALLSYMS_ALL=y
71# CONFIG_KALLSYMS_EXTRA_PASS is not set 111CONFIG_KALLSYMS_EXTRA_PASS=y
72CONFIG_HOTPLUG=y 112CONFIG_HOTPLUG=y
73CONFIG_PRINTK=y 113CONFIG_PRINTK=y
74CONFIG_BUG=y 114CONFIG_BUG=y
75CONFIG_ELF_CORE=y 115CONFIG_ELF_CORE=y
116# CONFIG_COMPAT_BRK is not set
76CONFIG_BASE_FULL=y 117CONFIG_BASE_FULL=y
77CONFIG_FUTEX=y 118CONFIG_FUTEX=y
78CONFIG_ANON_INODES=y 119CONFIG_ANON_INODES=y
@@ -82,9 +123,21 @@ CONFIG_TIMERFD=y
82CONFIG_EVENTFD=y 123CONFIG_EVENTFD=y
83CONFIG_SHMEM=y 124CONFIG_SHMEM=y
84CONFIG_VM_EVENT_COUNTERS=y 125CONFIG_VM_EVENT_COUNTERS=y
85CONFIG_SLAB=y 126CONFIG_SLUB_DEBUG=y
86# CONFIG_SLUB is not set 127# CONFIG_SLAB is not set
128CONFIG_SLUB=y
87# CONFIG_SLOB is not set 129# CONFIG_SLOB is not set
130CONFIG_PROFILING=y
131CONFIG_MARKERS=y
132# CONFIG_OPROFILE is not set
133CONFIG_HAVE_OPROFILE=y
134CONFIG_KPROBES=y
135CONFIG_KRETPROBES=y
136CONFIG_HAVE_KPROBES=y
137CONFIG_HAVE_KRETPROBES=y
138# CONFIG_HAVE_DMA_ATTRS is not set
139CONFIG_PROC_PAGE_MONITOR=y
140CONFIG_SLABINFO=y
88CONFIG_RT_MUTEXES=y 141CONFIG_RT_MUTEXES=y
89# CONFIG_TINY_SHMEM is not set 142# CONFIG_TINY_SHMEM is not set
90CONFIG_BASE_SMALL=0 143CONFIG_BASE_SMALL=0
@@ -96,14 +149,15 @@ CONFIG_MODULE_FORCE_UNLOAD=y
96# CONFIG_KMOD is not set 149# CONFIG_KMOD is not set
97CONFIG_STOP_MACHINE=y 150CONFIG_STOP_MACHINE=y
98CONFIG_BLOCK=y 151CONFIG_BLOCK=y
99# CONFIG_BLK_DEV_IO_TRACE is not set 152CONFIG_BLK_DEV_IO_TRACE=y
100# CONFIG_BLK_DEV_BSG is not set 153CONFIG_BLK_DEV_BSG=y
154CONFIG_BLOCK_COMPAT=y
101 155
102# 156#
103# IO Schedulers 157# IO Schedulers
104# 158#
105CONFIG_IOSCHED_NOOP=y 159CONFIG_IOSCHED_NOOP=y
106# CONFIG_IOSCHED_AS is not set 160CONFIG_IOSCHED_AS=y
107CONFIG_IOSCHED_DEADLINE=y 161CONFIG_IOSCHED_DEADLINE=y
108CONFIG_IOSCHED_CFQ=y 162CONFIG_IOSCHED_CFQ=y
109# CONFIG_DEFAULT_AS is not set 163# CONFIG_DEFAULT_AS is not set
@@ -111,107 +165,177 @@ CONFIG_IOSCHED_CFQ=y
111CONFIG_DEFAULT_CFQ=y 165CONFIG_DEFAULT_CFQ=y
112# CONFIG_DEFAULT_NOOP is not set 166# CONFIG_DEFAULT_NOOP is not set
113CONFIG_DEFAULT_IOSCHED="cfq" 167CONFIG_DEFAULT_IOSCHED="cfq"
168CONFIG_CLASSIC_RCU=y
114 169
115# 170#
116# Processor type and features 171# Processor type and features
117# 172#
173CONFIG_TICK_ONESHOT=y
174CONFIG_NO_HZ=y
175CONFIG_HIGH_RES_TIMERS=y
176CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
177CONFIG_SMP=y
118CONFIG_X86_PC=y 178CONFIG_X86_PC=y
179# CONFIG_X86_ELAN is not set
180# CONFIG_X86_VOYAGER is not set
181# CONFIG_X86_NUMAQ is not set
182# CONFIG_X86_SUMMIT is not set
183# CONFIG_X86_BIGSMP is not set
184# CONFIG_X86_VISWS is not set
185# CONFIG_X86_GENERICARCH is not set
186# CONFIG_X86_ES7000 is not set
187# CONFIG_X86_RDC321X is not set
119# CONFIG_X86_VSMP is not set 188# CONFIG_X86_VSMP is not set
189# CONFIG_PARAVIRT_GUEST is not set
190CONFIG_MEMTEST_BOOTPARAM=y
191CONFIG_MEMTEST_BOOTPARAM_VALUE=0
192# CONFIG_M386 is not set
193# CONFIG_M486 is not set
194# CONFIG_M586 is not set
195# CONFIG_M586TSC is not set
196# CONFIG_M586MMX is not set
197# CONFIG_M686 is not set
198# CONFIG_MPENTIUMII is not set
199# CONFIG_MPENTIUMIII is not set
200# CONFIG_MPENTIUMM is not set
201# CONFIG_MPENTIUM4 is not set
202# CONFIG_MK6 is not set
203# CONFIG_MK7 is not set
120# CONFIG_MK8 is not set 204# CONFIG_MK8 is not set
205# CONFIG_MCRUSOE is not set
206# CONFIG_MEFFICEON is not set
207# CONFIG_MWINCHIPC6 is not set
208# CONFIG_MWINCHIP2 is not set
209# CONFIG_MWINCHIP3D is not set
210# CONFIG_MGEODEGX1 is not set
211# CONFIG_MGEODE_LX is not set
212# CONFIG_MCYRIXIII is not set
213# CONFIG_MVIAC3_2 is not set
214# CONFIG_MVIAC7 is not set
121# CONFIG_MPSC is not set 215# CONFIG_MPSC is not set
122# CONFIG_MCORE2 is not set 216CONFIG_MCORE2=y
123CONFIG_GENERIC_CPU=y 217# CONFIG_GENERIC_CPU is not set
124CONFIG_X86_L1_CACHE_BYTES=128 218CONFIG_X86_CPU=y
125CONFIG_X86_L1_CACHE_SHIFT=7 219CONFIG_X86_L1_CACHE_BYTES=64
126CONFIG_X86_INTERNODE_CACHE_BYTES=128 220CONFIG_X86_INTERNODE_CACHE_BYTES=64
127CONFIG_X86_TSC=y 221CONFIG_X86_CMPXCHG=y
222CONFIG_X86_L1_CACHE_SHIFT=6
128CONFIG_X86_GOOD_APIC=y 223CONFIG_X86_GOOD_APIC=y
129# CONFIG_MICROCODE is not set 224CONFIG_X86_INTEL_USERCOPY=y
130CONFIG_X86_MSR=y 225CONFIG_X86_USE_PPRO_CHECKSUM=y
131CONFIG_X86_CPUID=y 226CONFIG_X86_P6_NOP=y
132CONFIG_X86_HT=y 227CONFIG_X86_TSC=y
133CONFIG_X86_IO_APIC=y 228CONFIG_X86_CMOV=y
134CONFIG_X86_LOCAL_APIC=y 229CONFIG_X86_MINIMUM_CPU_FAMILY=64
135CONFIG_MTRR=y 230CONFIG_X86_DEBUGCTLMSR=y
136CONFIG_SMP=y 231CONFIG_HPET_TIMER=y
137CONFIG_SCHED_SMT=y 232CONFIG_HPET_EMULATE_RTC=y
233CONFIG_DMI=y
234CONFIG_GART_IOMMU=y
235CONFIG_CALGARY_IOMMU=y
236CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
237CONFIG_SWIOTLB=y
238CONFIG_IOMMU_HELPER=y
239CONFIG_NR_CPUS=4
240# CONFIG_SCHED_SMT is not set
138CONFIG_SCHED_MC=y 241CONFIG_SCHED_MC=y
139# CONFIG_PREEMPT_NONE is not set 242# CONFIG_PREEMPT_NONE is not set
140CONFIG_PREEMPT_VOLUNTARY=y 243CONFIG_PREEMPT_VOLUNTARY=y
141# CONFIG_PREEMPT is not set 244# CONFIG_PREEMPT is not set
142CONFIG_PREEMPT_BKL=y 245CONFIG_X86_LOCAL_APIC=y
246CONFIG_X86_IO_APIC=y
247# CONFIG_X86_MCE is not set
248# CONFIG_I8K is not set
249# CONFIG_MICROCODE is not set
250CONFIG_X86_MSR=y
251CONFIG_X86_CPUID=y
143CONFIG_NUMA=y 252CONFIG_NUMA=y
144CONFIG_K8_NUMA=y 253CONFIG_K8_NUMA=y
145CONFIG_NODES_SHIFT=6
146CONFIG_X86_64_ACPI_NUMA=y 254CONFIG_X86_64_ACPI_NUMA=y
147CONFIG_NUMA_EMU=y 255CONFIG_NODES_SPAN_OTHER_NODES=y
256# CONFIG_NUMA_EMU is not set
257CONFIG_NODES_SHIFT=6
258CONFIG_ARCH_SPARSEMEM_DEFAULT=y
259CONFIG_ARCH_SPARSEMEM_ENABLE=y
260CONFIG_ARCH_SELECT_MEMORY_MODEL=y
261CONFIG_SELECT_MEMORY_MODEL=y
262# CONFIG_FLATMEM_MANUAL is not set
263# CONFIG_DISCONTIGMEM_MANUAL is not set
264CONFIG_SPARSEMEM_MANUAL=y
265CONFIG_SPARSEMEM=y
148CONFIG_NEED_MULTIPLE_NODES=y 266CONFIG_NEED_MULTIPLE_NODES=y
267CONFIG_HAVE_MEMORY_PRESENT=y
149# CONFIG_SPARSEMEM_STATIC is not set 268# CONFIG_SPARSEMEM_STATIC is not set
269CONFIG_SPARSEMEM_EXTREME=y
270CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
271CONFIG_SPARSEMEM_VMEMMAP=y
272
273#
274# Memory hotplug is currently incompatible with Software Suspend
275#
276CONFIG_PAGEFLAGS_EXTENDED=y
150CONFIG_SPLIT_PTLOCK_CPUS=4 277CONFIG_SPLIT_PTLOCK_CPUS=4
151CONFIG_MIGRATION=y 278CONFIG_MIGRATION=y
152CONFIG_RESOURCES_64BIT=y 279CONFIG_RESOURCES_64BIT=y
153CONFIG_ZONE_DMA_FLAG=1 280CONFIG_ZONE_DMA_FLAG=1
154CONFIG_BOUNCE=y 281CONFIG_BOUNCE=y
155CONFIG_VIRT_TO_BUS=y 282CONFIG_VIRT_TO_BUS=y
156CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y 283CONFIG_MTRR=y
157CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y 284# CONFIG_X86_PAT is not set
158CONFIG_NR_CPUS=32 285CONFIG_EFI=y
159CONFIG_PHYSICAL_ALIGN=0x200000
160CONFIG_HOTPLUG_CPU=y
161CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
162CONFIG_HPET_TIMER=y
163CONFIG_HPET_EMULATE_RTC=y
164CONFIG_GART_IOMMU=y
165# CONFIG_CALGARY_IOMMU is not set
166CONFIG_SWIOTLB=y
167CONFIG_X86_MCE=y
168CONFIG_X86_MCE_INTEL=y
169CONFIG_X86_MCE_AMD=y
170# CONFIG_KEXEC is not set
171# CONFIG_CRASH_DUMP is not set
172# CONFIG_RELOCATABLE is not set
173CONFIG_PHYSICAL_START=0x200000
174CONFIG_SECCOMP=y 286CONFIG_SECCOMP=y
175# CONFIG_CC_STACKPROTECTOR is not set
176# CONFIG_HZ_100 is not set 287# CONFIG_HZ_100 is not set
177CONFIG_HZ_250=y 288# CONFIG_HZ_250 is not set
178# CONFIG_HZ_300 is not set 289# CONFIG_HZ_300 is not set
179# CONFIG_HZ_1000 is not set 290CONFIG_HZ_1000=y
180CONFIG_HZ=250 291CONFIG_HZ=1000
181CONFIG_K8_NB=y 292CONFIG_SCHED_HRTICK=y
182CONFIG_GENERIC_HARDIRQS=y 293CONFIG_KEXEC=y
183CONFIG_GENERIC_IRQ_PROBE=y 294CONFIG_CRASH_DUMP=y
184CONFIG_ISA_DMA_API=y 295CONFIG_PHYSICAL_START=0x1000000
185CONFIG_GENERIC_PENDING_IRQ=y 296CONFIG_RELOCATABLE=y
297CONFIG_PHYSICAL_ALIGN=0x200000
298CONFIG_HOTPLUG_CPU=y
299# CONFIG_COMPAT_VDSO is not set
300CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
301CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
186 302
187# 303#
188# Power management options 304# Power management options
189# 305#
306CONFIG_ARCH_HIBERNATION_HEADER=y
190CONFIG_PM=y 307CONFIG_PM=y
191# CONFIG_PM_LEGACY is not set 308CONFIG_PM_DEBUG=y
192# CONFIG_PM_DEBUG is not set 309# CONFIG_PM_VERBOSE is not set
310CONFIG_CAN_PM_TRACE=y
311CONFIG_PM_TRACE=y
312CONFIG_PM_TRACE_RTC=y
313CONFIG_PM_SLEEP_SMP=y
314CONFIG_PM_SLEEP=y
315CONFIG_SUSPEND=y
316CONFIG_SUSPEND_FREEZER=y
193CONFIG_HIBERNATION=y 317CONFIG_HIBERNATION=y
194CONFIG_PM_STD_PARTITION="" 318CONFIG_PM_STD_PARTITION=""
195
196#
197# ACPI (Advanced Configuration and Power Interface) Support
198#
199CONFIG_ACPI=y 319CONFIG_ACPI=y
200CONFIG_ACPI_SLEEP=y 320CONFIG_ACPI_SLEEP=y
201CONFIG_ACPI_SLEEP_PROC_FS=y
202CONFIG_ACPI_SLEEP_PROC_SLEEP=y
203CONFIG_ACPI_PROCFS=y 321CONFIG_ACPI_PROCFS=y
322CONFIG_ACPI_PROCFS_POWER=y
323CONFIG_ACPI_SYSFS_POWER=y
324CONFIG_ACPI_PROC_EVENT=y
204CONFIG_ACPI_AC=y 325CONFIG_ACPI_AC=y
205CONFIG_ACPI_BATTERY=y 326CONFIG_ACPI_BATTERY=y
206CONFIG_ACPI_BUTTON=y 327CONFIG_ACPI_BUTTON=y
207CONFIG_ACPI_FAN=y 328CONFIG_ACPI_FAN=y
208# CONFIG_ACPI_DOCK is not set 329CONFIG_ACPI_DOCK=y
330# CONFIG_ACPI_BAY is not set
209CONFIG_ACPI_PROCESSOR=y 331CONFIG_ACPI_PROCESSOR=y
210CONFIG_ACPI_HOTPLUG_CPU=y 332CONFIG_ACPI_HOTPLUG_CPU=y
211CONFIG_ACPI_THERMAL=y 333CONFIG_ACPI_THERMAL=y
212CONFIG_ACPI_NUMA=y 334CONFIG_ACPI_NUMA=y
335# CONFIG_ACPI_WMI is not set
213# CONFIG_ACPI_ASUS is not set 336# CONFIG_ACPI_ASUS is not set
214# CONFIG_ACPI_TOSHIBA is not set 337# CONFIG_ACPI_TOSHIBA is not set
338# CONFIG_ACPI_CUSTOM_DSDT is not set
215CONFIG_ACPI_BLACKLIST_YEAR=0 339CONFIG_ACPI_BLACKLIST_YEAR=0
216# CONFIG_ACPI_DEBUG is not set 340# CONFIG_ACPI_DEBUG is not set
217CONFIG_ACPI_EC=y 341CONFIG_ACPI_EC=y
@@ -227,29 +351,34 @@ CONFIG_ACPI_CONTAINER=y
227CONFIG_CPU_FREQ=y 351CONFIG_CPU_FREQ=y
228CONFIG_CPU_FREQ_TABLE=y 352CONFIG_CPU_FREQ_TABLE=y
229CONFIG_CPU_FREQ_DEBUG=y 353CONFIG_CPU_FREQ_DEBUG=y
230CONFIG_CPU_FREQ_STAT=y 354# CONFIG_CPU_FREQ_STAT is not set
231# CONFIG_CPU_FREQ_STAT_DETAILS is not set 355# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
232CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y 356# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
233# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set 357CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
358# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
359# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
234CONFIG_CPU_FREQ_GOV_PERFORMANCE=y 360CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
235# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set 361# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
236CONFIG_CPU_FREQ_GOV_USERSPACE=y 362CONFIG_CPU_FREQ_GOV_USERSPACE=y
237CONFIG_CPU_FREQ_GOV_ONDEMAND=y 363CONFIG_CPU_FREQ_GOV_ONDEMAND=y
238CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y 364# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
239 365
240# 366#
241# CPUFreq processor drivers 367# CPUFreq processor drivers
242# 368#
243CONFIG_X86_POWERNOW_K8=y
244CONFIG_X86_POWERNOW_K8_ACPI=y
245# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
246CONFIG_X86_ACPI_CPUFREQ=y 369CONFIG_X86_ACPI_CPUFREQ=y
370# CONFIG_X86_POWERNOW_K8 is not set
371# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
372# CONFIG_X86_P4_CLOCKMOD is not set
247 373
248# 374#
249# shared options 375# shared options
250# 376#
251CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y 377# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
252# CONFIG_X86_SPEEDSTEP_LIB is not set 378# CONFIG_X86_SPEEDSTEP_LIB is not set
379CONFIG_CPU_IDLE=y
380CONFIG_CPU_IDLE_GOV_LADDER=y
381CONFIG_CPU_IDLE_GOV_MENU=y
253 382
254# 383#
255# Bus options (PCI etc.) 384# Bus options (PCI etc.)
@@ -257,27 +386,56 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
257CONFIG_PCI=y 386CONFIG_PCI=y
258CONFIG_PCI_DIRECT=y 387CONFIG_PCI_DIRECT=y
259CONFIG_PCI_MMCONFIG=y 388CONFIG_PCI_MMCONFIG=y
389CONFIG_PCI_DOMAINS=y
390CONFIG_DMAR=y
391CONFIG_DMAR_GFX_WA=y
392CONFIG_DMAR_FLOPPY_WA=y
260CONFIG_PCIEPORTBUS=y 393CONFIG_PCIEPORTBUS=y
394# CONFIG_HOTPLUG_PCI_PCIE is not set
261CONFIG_PCIEAER=y 395CONFIG_PCIEAER=y
396# CONFIG_PCIEASPM is not set
262CONFIG_ARCH_SUPPORTS_MSI=y 397CONFIG_ARCH_SUPPORTS_MSI=y
263CONFIG_PCI_MSI=y 398CONFIG_PCI_MSI=y
399# CONFIG_PCI_LEGACY is not set
264# CONFIG_PCI_DEBUG is not set 400# CONFIG_PCI_DEBUG is not set
265# CONFIG_HT_IRQ is not set 401CONFIG_HT_IRQ=y
266 402CONFIG_ISA_DMA_API=y
267# 403CONFIG_K8_NB=y
268# PCCARD (PCMCIA/CardBus) support 404CONFIG_PCCARD=y
269# 405# CONFIG_PCMCIA_DEBUG is not set
270# CONFIG_PCCARD is not set 406CONFIG_PCMCIA=y
271# CONFIG_HOTPLUG_PCI is not set 407CONFIG_PCMCIA_LOAD_CIS=y
408CONFIG_PCMCIA_IOCTL=y
409CONFIG_CARDBUS=y
410
411#
412# PC-card bridges
413#
414CONFIG_YENTA=y
415CONFIG_YENTA_O2=y
416CONFIG_YENTA_RICOH=y
417CONFIG_YENTA_TI=y
418CONFIG_YENTA_ENE_TUNE=y
419CONFIG_YENTA_TOSHIBA=y
420# CONFIG_PD6729 is not set
421# CONFIG_I82092 is not set
422CONFIG_PCCARD_NONSTATIC=y
423CONFIG_HOTPLUG_PCI=y
424# CONFIG_HOTPLUG_PCI_FAKE is not set
425# CONFIG_HOTPLUG_PCI_ACPI is not set
426# CONFIG_HOTPLUG_PCI_CPCI is not set
427# CONFIG_HOTPLUG_PCI_SHPC is not set
272 428
273# 429#
274# Executable file formats / Emulations 430# Executable file formats / Emulations
275# 431#
276CONFIG_BINFMT_ELF=y 432CONFIG_BINFMT_ELF=y
277# CONFIG_BINFMT_MISC is not set 433CONFIG_COMPAT_BINFMT_ELF=y
434CONFIG_BINFMT_MISC=y
278CONFIG_IA32_EMULATION=y 435CONFIG_IA32_EMULATION=y
279CONFIG_IA32_AOUT=y 436# CONFIG_IA32_AOUT is not set
280CONFIG_COMPAT=y 437CONFIG_COMPAT=y
438CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
281CONFIG_SYSVIPC_COMPAT=y 439CONFIG_SYSVIPC_COMPAT=y
282 440
283# 441#
@@ -289,22 +447,31 @@ CONFIG_NET=y
289# Networking options 447# Networking options
290# 448#
291CONFIG_PACKET=y 449CONFIG_PACKET=y
292# CONFIG_PACKET_MMAP is not set 450CONFIG_PACKET_MMAP=y
293CONFIG_UNIX=y 451CONFIG_UNIX=y
452CONFIG_XFRM=y
453CONFIG_XFRM_USER=y
454# CONFIG_XFRM_SUB_POLICY is not set
455# CONFIG_XFRM_MIGRATE is not set
456# CONFIG_XFRM_STATISTICS is not set
294# CONFIG_NET_KEY is not set 457# CONFIG_NET_KEY is not set
295CONFIG_INET=y 458CONFIG_INET=y
296CONFIG_IP_MULTICAST=y 459CONFIG_IP_MULTICAST=y
297# CONFIG_IP_ADVANCED_ROUTER is not set 460CONFIG_IP_ADVANCED_ROUTER=y
461CONFIG_ASK_IP_FIB_HASH=y
462# CONFIG_IP_FIB_TRIE is not set
298CONFIG_IP_FIB_HASH=y 463CONFIG_IP_FIB_HASH=y
299CONFIG_IP_PNP=y 464CONFIG_IP_MULTIPLE_TABLES=y
300CONFIG_IP_PNP_DHCP=y 465CONFIG_IP_ROUTE_MULTIPATH=y
301# CONFIG_IP_PNP_BOOTP is not set 466CONFIG_IP_ROUTE_VERBOSE=y
302# CONFIG_IP_PNP_RARP is not set 467# CONFIG_IP_PNP is not set
303# CONFIG_NET_IPIP is not set 468# CONFIG_NET_IPIP is not set
304# CONFIG_NET_IPGRE is not set 469# CONFIG_NET_IPGRE is not set
305# CONFIG_IP_MROUTE is not set 470CONFIG_IP_MROUTE=y
471CONFIG_IP_PIMSM_V1=y
472CONFIG_IP_PIMSM_V2=y
306# CONFIG_ARPD is not set 473# CONFIG_ARPD is not set
307# CONFIG_SYN_COOKIES is not set 474CONFIG_SYN_COOKIES=y
308# CONFIG_INET_AH is not set 475# CONFIG_INET_AH is not set
309# CONFIG_INET_ESP is not set 476# CONFIG_INET_ESP is not set
310# CONFIG_INET_IPCOMP is not set 477# CONFIG_INET_IPCOMP is not set
@@ -313,31 +480,109 @@ CONFIG_INET_TUNNEL=y
313# CONFIG_INET_XFRM_MODE_TRANSPORT is not set 480# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
314# CONFIG_INET_XFRM_MODE_TUNNEL is not set 481# CONFIG_INET_XFRM_MODE_TUNNEL is not set
315# CONFIG_INET_XFRM_MODE_BEET is not set 482# CONFIG_INET_XFRM_MODE_BEET is not set
316CONFIG_INET_DIAG=y 483CONFIG_INET_LRO=y
317CONFIG_INET_TCP_DIAG=y 484# CONFIG_INET_DIAG is not set
318# CONFIG_TCP_CONG_ADVANCED is not set 485CONFIG_TCP_CONG_ADVANCED=y
486# CONFIG_TCP_CONG_BIC is not set
319CONFIG_TCP_CONG_CUBIC=y 487CONFIG_TCP_CONG_CUBIC=y
488# CONFIG_TCP_CONG_WESTWOOD is not set
489# CONFIG_TCP_CONG_HTCP is not set
490# CONFIG_TCP_CONG_HSTCP is not set
491# CONFIG_TCP_CONG_HYBLA is not set
492# CONFIG_TCP_CONG_VEGAS is not set
493# CONFIG_TCP_CONG_SCALABLE is not set
494# CONFIG_TCP_CONG_LP is not set
495# CONFIG_TCP_CONG_VENO is not set
496# CONFIG_TCP_CONG_YEAH is not set
497# CONFIG_TCP_CONG_ILLINOIS is not set
498# CONFIG_DEFAULT_BIC is not set
499CONFIG_DEFAULT_CUBIC=y
500# CONFIG_DEFAULT_HTCP is not set
501# CONFIG_DEFAULT_VEGAS is not set
502# CONFIG_DEFAULT_WESTWOOD is not set
503# CONFIG_DEFAULT_RENO is not set
320CONFIG_DEFAULT_TCP_CONG="cubic" 504CONFIG_DEFAULT_TCP_CONG="cubic"
321# CONFIG_TCP_MD5SIG is not set 505CONFIG_TCP_MD5SIG=y
506# CONFIG_IP_VS is not set
322CONFIG_IPV6=y 507CONFIG_IPV6=y
323# CONFIG_IPV6_PRIVACY is not set 508# CONFIG_IPV6_PRIVACY is not set
324# CONFIG_IPV6_ROUTER_PREF is not set 509# CONFIG_IPV6_ROUTER_PREF is not set
325# CONFIG_IPV6_OPTIMISTIC_DAD is not set 510# CONFIG_IPV6_OPTIMISTIC_DAD is not set
326# CONFIG_INET6_AH is not set 511CONFIG_INET6_AH=y
327# CONFIG_INET6_ESP is not set 512CONFIG_INET6_ESP=y
328# CONFIG_INET6_IPCOMP is not set 513# CONFIG_INET6_IPCOMP is not set
329# CONFIG_IPV6_MIP6 is not set 514# CONFIG_IPV6_MIP6 is not set
330# CONFIG_INET6_XFRM_TUNNEL is not set 515# CONFIG_INET6_XFRM_TUNNEL is not set
331# CONFIG_INET6_TUNNEL is not set 516# CONFIG_INET6_TUNNEL is not set
332# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set 517CONFIG_INET6_XFRM_MODE_TRANSPORT=y
333# CONFIG_INET6_XFRM_MODE_TUNNEL is not set 518CONFIG_INET6_XFRM_MODE_TUNNEL=y
334# CONFIG_INET6_XFRM_MODE_BEET is not set 519CONFIG_INET6_XFRM_MODE_BEET=y
335# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set 520# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
336CONFIG_IPV6_SIT=y 521CONFIG_IPV6_SIT=y
522CONFIG_IPV6_NDISC_NODETYPE=y
337# CONFIG_IPV6_TUNNEL is not set 523# CONFIG_IPV6_TUNNEL is not set
338# CONFIG_IPV6_MULTIPLE_TABLES is not set 524# CONFIG_IPV6_MULTIPLE_TABLES is not set
339# CONFIG_NETWORK_SECMARK is not set 525# CONFIG_IPV6_MROUTE is not set
340# CONFIG_NETFILTER is not set 526CONFIG_NETLABEL=y
527CONFIG_NETWORK_SECMARK=y
528CONFIG_NETFILTER=y
529# CONFIG_NETFILTER_DEBUG is not set
530# CONFIG_NETFILTER_ADVANCED is not set
531
532#
533# Core Netfilter Configuration
534#
535CONFIG_NETFILTER_NETLINK=y
536CONFIG_NETFILTER_NETLINK_LOG=y
537CONFIG_NF_CONNTRACK=y
538CONFIG_NF_CONNTRACK_SECMARK=y
539CONFIG_NF_CONNTRACK_FTP=y
540CONFIG_NF_CONNTRACK_IRC=y
541CONFIG_NF_CONNTRACK_SIP=y
542CONFIG_NF_CT_NETLINK=y
543CONFIG_NETFILTER_XTABLES=y
544CONFIG_NETFILTER_XT_TARGET_MARK=y
545CONFIG_NETFILTER_XT_TARGET_NFLOG=y
546CONFIG_NETFILTER_XT_TARGET_SECMARK=y
547CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
548CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
549CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
550CONFIG_NETFILTER_XT_MATCH_MARK=y
551CONFIG_NETFILTER_XT_MATCH_POLICY=y
552CONFIG_NETFILTER_XT_MATCH_STATE=y
553
554#
555# IP: Netfilter Configuration
556#
557CONFIG_NF_CONNTRACK_IPV4=y
558CONFIG_NF_CONNTRACK_PROC_COMPAT=y
559CONFIG_IP_NF_IPTABLES=y
560CONFIG_IP_NF_FILTER=y
561CONFIG_IP_NF_TARGET_REJECT=y
562CONFIG_IP_NF_TARGET_LOG=y
563CONFIG_IP_NF_TARGET_ULOG=y
564CONFIG_NF_NAT=y
565CONFIG_NF_NAT_NEEDED=y
566CONFIG_IP_NF_TARGET_MASQUERADE=y
567CONFIG_NF_NAT_FTP=y
568CONFIG_NF_NAT_IRC=y
569# CONFIG_NF_NAT_TFTP is not set
570# CONFIG_NF_NAT_AMANDA is not set
571# CONFIG_NF_NAT_PPTP is not set
572# CONFIG_NF_NAT_H323 is not set
573CONFIG_NF_NAT_SIP=y
574CONFIG_IP_NF_MANGLE=y
575
576#
577# IPv6: Netfilter Configuration
578#
579CONFIG_NF_CONNTRACK_IPV6=y
580CONFIG_IP6_NF_IPTABLES=y
581CONFIG_IP6_NF_MATCH_IPV6HEADER=y
582CONFIG_IP6_NF_FILTER=y
583CONFIG_IP6_NF_TARGET_LOG=y
584CONFIG_IP6_NF_TARGET_REJECT=y
585CONFIG_IP6_NF_MANGLE=y
341# CONFIG_IP_DCCP is not set 586# CONFIG_IP_DCCP is not set
342# CONFIG_IP_SCTP is not set 587# CONFIG_IP_SCTP is not set
343# CONFIG_TIPC is not set 588# CONFIG_TIPC is not set
@@ -345,6 +590,7 @@ CONFIG_IPV6_SIT=y
345# CONFIG_BRIDGE is not set 590# CONFIG_BRIDGE is not set
346# CONFIG_VLAN_8021Q is not set 591# CONFIG_VLAN_8021Q is not set
347# CONFIG_DECNET is not set 592# CONFIG_DECNET is not set
593CONFIG_LLC=y
348# CONFIG_LLC2 is not set 594# CONFIG_LLC2 is not set
349# CONFIG_IPX is not set 595# CONFIG_IPX is not set
350# CONFIG_ATALK is not set 596# CONFIG_ATALK is not set
@@ -352,28 +598,99 @@ CONFIG_IPV6_SIT=y
352# CONFIG_LAPB is not set 598# CONFIG_LAPB is not set
353# CONFIG_ECONET is not set 599# CONFIG_ECONET is not set
354# CONFIG_WAN_ROUTER is not set 600# CONFIG_WAN_ROUTER is not set
355 601CONFIG_NET_SCHED=y
356# 602
357# QoS and/or fair queueing 603#
358# 604# Queueing/Scheduling
359# CONFIG_NET_SCHED is not set 605#
606# CONFIG_NET_SCH_CBQ is not set
607# CONFIG_NET_SCH_HTB is not set
608# CONFIG_NET_SCH_HFSC is not set
609# CONFIG_NET_SCH_PRIO is not set
610# CONFIG_NET_SCH_RR is not set
611# CONFIG_NET_SCH_RED is not set
612# CONFIG_NET_SCH_SFQ is not set
613# CONFIG_NET_SCH_TEQL is not set
614# CONFIG_NET_SCH_TBF is not set
615# CONFIG_NET_SCH_GRED is not set
616# CONFIG_NET_SCH_DSMARK is not set
617# CONFIG_NET_SCH_NETEM is not set
618# CONFIG_NET_SCH_INGRESS is not set
619
620#
621# Classification
622#
623CONFIG_NET_CLS=y
624# CONFIG_NET_CLS_BASIC is not set
625# CONFIG_NET_CLS_TCINDEX is not set
626# CONFIG_NET_CLS_ROUTE4 is not set
627# CONFIG_NET_CLS_FW is not set
628# CONFIG_NET_CLS_U32 is not set
629# CONFIG_NET_CLS_RSVP is not set
630# CONFIG_NET_CLS_RSVP6 is not set
631# CONFIG_NET_CLS_FLOW is not set
632CONFIG_NET_EMATCH=y
633CONFIG_NET_EMATCH_STACK=32
634# CONFIG_NET_EMATCH_CMP is not set
635# CONFIG_NET_EMATCH_NBYTE is not set
636# CONFIG_NET_EMATCH_U32 is not set
637# CONFIG_NET_EMATCH_META is not set
638# CONFIG_NET_EMATCH_TEXT is not set
639CONFIG_NET_CLS_ACT=y
640# CONFIG_NET_ACT_POLICE is not set
641# CONFIG_NET_ACT_GACT is not set
642# CONFIG_NET_ACT_MIRRED is not set
643# CONFIG_NET_ACT_IPT is not set
644# CONFIG_NET_ACT_NAT is not set
645# CONFIG_NET_ACT_PEDIT is not set
646# CONFIG_NET_ACT_SIMP is not set
647CONFIG_NET_SCH_FIFO=y
360 648
361# 649#
362# Network testing 650# Network testing
363# 651#
364# CONFIG_NET_PKTGEN is not set 652# CONFIG_NET_PKTGEN is not set
365# CONFIG_NET_TCPPROBE is not set 653# CONFIG_NET_TCPPROBE is not set
366# CONFIG_HAMRADIO is not set 654CONFIG_HAMRADIO=y
655
656#
657# Packet Radio protocols
658#
659# CONFIG_AX25 is not set
660# CONFIG_CAN is not set
367# CONFIG_IRDA is not set 661# CONFIG_IRDA is not set
368# CONFIG_BT is not set 662# CONFIG_BT is not set
369# CONFIG_AF_RXRPC is not set 663# CONFIG_AF_RXRPC is not set
664CONFIG_FIB_RULES=y
370 665
371# 666#
372# Wireless 667# Wireless
373# 668#
374# CONFIG_CFG80211 is not set 669CONFIG_CFG80211=y
375# CONFIG_WIRELESS_EXT is not set 670CONFIG_NL80211=y
376# CONFIG_MAC80211 is not set 671CONFIG_WIRELESS_EXT=y
672CONFIG_MAC80211=y
673
674#
675# Rate control algorithm selection
676#
677CONFIG_MAC80211_RC_DEFAULT_PID=y
678# CONFIG_MAC80211_RC_DEFAULT_NONE is not set
679
680#
681# Selecting 'y' for an algorithm will
682#
683
684#
685# build the algorithm into mac80211.
686#
687CONFIG_MAC80211_RC_DEFAULT="pid"
688CONFIG_MAC80211_RC_PID=y
689# CONFIG_MAC80211_MESH is not set
690CONFIG_MAC80211_LEDS=y
691# CONFIG_MAC80211_DEBUGFS is not set
692# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set
693# CONFIG_MAC80211_DEBUG is not set
377# CONFIG_IEEE80211 is not set 694# CONFIG_IEEE80211 is not set
378# CONFIG_RFKILL is not set 695# CONFIG_RFKILL is not set
379# CONFIG_NET_9P is not set 696# CONFIG_NET_9P is not set
@@ -385,13 +702,15 @@ CONFIG_IPV6_SIT=y
385# 702#
386# Generic Driver Options 703# Generic Driver Options
387# 704#
705CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
388CONFIG_STANDALONE=y 706CONFIG_STANDALONE=y
389CONFIG_PREVENT_FIRMWARE_BUILD=y 707CONFIG_PREVENT_FIRMWARE_BUILD=y
390CONFIG_FW_LOADER=y 708CONFIG_FW_LOADER=y
391# CONFIG_DEBUG_DRIVER is not set 709# CONFIG_DEBUG_DRIVER is not set
392# CONFIG_DEBUG_DEVRES is not set 710CONFIG_DEBUG_DEVRES=y
393# CONFIG_SYS_HYPERVISOR is not set 711# CONFIG_SYS_HYPERVISOR is not set
394# CONFIG_CONNECTOR is not set 712CONFIG_CONNECTOR=y
713CONFIG_PROC_EVENTS=y
395# CONFIG_MTD is not set 714# CONFIG_MTD is not set
396# CONFIG_PARPORT is not set 715# CONFIG_PARPORT is not set
397CONFIG_PNP=y 716CONFIG_PNP=y
@@ -402,7 +721,7 @@ CONFIG_PNP=y
402# 721#
403CONFIG_PNPACPI=y 722CONFIG_PNPACPI=y
404CONFIG_BLK_DEV=y 723CONFIG_BLK_DEV=y
405CONFIG_BLK_DEV_FD=y 724# CONFIG_BLK_DEV_FD is not set
406# CONFIG_BLK_CPQ_DA is not set 725# CONFIG_BLK_CPQ_DA is not set
407# CONFIG_BLK_CPQ_CISS_DA is not set 726# CONFIG_BLK_CPQ_CISS_DA is not set
408# CONFIG_BLK_DEV_DAC960 is not set 727# CONFIG_BLK_DEV_DAC960 is not set
@@ -415,8 +734,8 @@ CONFIG_BLK_DEV_LOOP=y
415# CONFIG_BLK_DEV_UB is not set 734# CONFIG_BLK_DEV_UB is not set
416CONFIG_BLK_DEV_RAM=y 735CONFIG_BLK_DEV_RAM=y
417CONFIG_BLK_DEV_RAM_COUNT=16 736CONFIG_BLK_DEV_RAM_COUNT=16
418CONFIG_BLK_DEV_RAM_SIZE=4096 737CONFIG_BLK_DEV_RAM_SIZE=16384
419CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 738# CONFIG_BLK_DEV_XIP is not set
420# CONFIG_CDROM_PKTCDVD is not set 739# CONFIG_CDROM_PKTCDVD is not set
421# CONFIG_ATA_OVER_ETH is not set 740# CONFIG_ATA_OVER_ETH is not set
422CONFIG_MISC_DEVICES=y 741CONFIG_MISC_DEVICES=y
@@ -425,72 +744,16 @@ CONFIG_MISC_DEVICES=y
425# CONFIG_EEPROM_93CX6 is not set 744# CONFIG_EEPROM_93CX6 is not set
426# CONFIG_SGI_IOC4 is not set 745# CONFIG_SGI_IOC4 is not set
427# CONFIG_TIFM_CORE is not set 746# CONFIG_TIFM_CORE is not set
747# CONFIG_ACER_WMI is not set
748# CONFIG_ASUS_LAPTOP is not set
749# CONFIG_FUJITSU_LAPTOP is not set
750# CONFIG_MSI_LAPTOP is not set
428# CONFIG_SONY_LAPTOP is not set 751# CONFIG_SONY_LAPTOP is not set
429# CONFIG_THINKPAD_ACPI is not set 752# CONFIG_THINKPAD_ACPI is not set
430CONFIG_IDE=y 753# CONFIG_INTEL_MENLOW is not set
431CONFIG_BLK_DEV_IDE=y 754# CONFIG_ENCLOSURE_SERVICES is not set
432 755CONFIG_HAVE_IDE=y
433# 756# CONFIG_IDE is not set
434# Please see Documentation/ide.txt for help/info on IDE drives
435#
436# CONFIG_BLK_DEV_IDE_SATA is not set
437# CONFIG_BLK_DEV_HD_IDE is not set
438CONFIG_BLK_DEV_IDEDISK=y
439CONFIG_IDEDISK_MULTI_MODE=y
440CONFIG_BLK_DEV_IDECD=y
441# CONFIG_BLK_DEV_IDETAPE is not set
442# CONFIG_BLK_DEV_IDEFLOPPY is not set
443# CONFIG_BLK_DEV_IDESCSI is not set
444CONFIG_BLK_DEV_IDEACPI=y
445# CONFIG_IDE_TASK_IOCTL is not set
446CONFIG_IDE_PROC_FS=y
447
448#
449# IDE chipset support/bugfixes
450#
451CONFIG_IDE_GENERIC=y
452# CONFIG_BLK_DEV_CMD640 is not set
453# CONFIG_BLK_DEV_IDEPNP is not set
454CONFIG_BLK_DEV_IDEPCI=y
455# CONFIG_IDEPCI_SHARE_IRQ is not set
456CONFIG_IDEPCI_PCIBUS_ORDER=y
457# CONFIG_BLK_DEV_OFFBOARD is not set
458# CONFIG_BLK_DEV_GENERIC is not set
459# CONFIG_BLK_DEV_OPTI621 is not set
460# CONFIG_BLK_DEV_RZ1000 is not set
461CONFIG_BLK_DEV_IDEDMA_PCI=y
462# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
463# CONFIG_IDEDMA_ONLYDISK is not set
464# CONFIG_BLK_DEV_AEC62XX is not set
465# CONFIG_BLK_DEV_ALI15X3 is not set
466CONFIG_BLK_DEV_AMD74XX=y
467CONFIG_BLK_DEV_ATIIXP=y
468# CONFIG_BLK_DEV_CMD64X is not set
469# CONFIG_BLK_DEV_TRIFLEX is not set
470# CONFIG_BLK_DEV_CY82C693 is not set
471# CONFIG_BLK_DEV_CS5520 is not set
472# CONFIG_BLK_DEV_CS5530 is not set
473# CONFIG_BLK_DEV_HPT34X is not set
474# CONFIG_BLK_DEV_HPT366 is not set
475# CONFIG_BLK_DEV_JMICRON is not set
476# CONFIG_BLK_DEV_SC1200 is not set
477CONFIG_BLK_DEV_PIIX=y
478# CONFIG_BLK_DEV_IT8213 is not set
479# CONFIG_BLK_DEV_IT821X is not set
480# CONFIG_BLK_DEV_NS87415 is not set
481# CONFIG_BLK_DEV_PDC202XX_OLD is not set
482CONFIG_BLK_DEV_PDC202XX_NEW=y
483# CONFIG_BLK_DEV_SVWKS is not set
484# CONFIG_BLK_DEV_SIIMAGE is not set
485# CONFIG_BLK_DEV_SIS5513 is not set
486# CONFIG_BLK_DEV_SLC90E66 is not set
487# CONFIG_BLK_DEV_TRM290 is not set
488# CONFIG_BLK_DEV_VIA82CXXX is not set
489# CONFIG_BLK_DEV_TC86C001 is not set
490# CONFIG_IDE_ARM is not set
491CONFIG_BLK_DEV_IDEDMA=y
492# CONFIG_IDEDMA_IVB is not set
493# CONFIG_BLK_DEV_HD is not set
494 757
495# 758#
496# SCSI device support 759# SCSI device support
@@ -499,8 +762,8 @@ CONFIG_BLK_DEV_IDEDMA=y
499CONFIG_SCSI=y 762CONFIG_SCSI=y
500CONFIG_SCSI_DMA=y 763CONFIG_SCSI_DMA=y
501# CONFIG_SCSI_TGT is not set 764# CONFIG_SCSI_TGT is not set
502CONFIG_SCSI_NETLINK=y 765# CONFIG_SCSI_NETLINK is not set
503# CONFIG_SCSI_PROC_FS is not set 766CONFIG_SCSI_PROC_FS=y
504 767
505# 768#
506# SCSI support type (disk, tape, CD-ROM) 769# SCSI support type (disk, tape, CD-ROM)
@@ -509,7 +772,7 @@ CONFIG_BLK_DEV_SD=y
509# CONFIG_CHR_DEV_ST is not set 772# CONFIG_CHR_DEV_ST is not set
510# CONFIG_CHR_DEV_OSST is not set 773# CONFIG_CHR_DEV_OSST is not set
511CONFIG_BLK_DEV_SR=y 774CONFIG_BLK_DEV_SR=y
512# CONFIG_BLK_DEV_SR_VENDOR is not set 775CONFIG_BLK_DEV_SR_VENDOR=y
513CONFIG_CHR_DEV_SG=y 776CONFIG_CHR_DEV_SG=y
514# CONFIG_CHR_DEV_SCH is not set 777# CONFIG_CHR_DEV_SCH is not set
515 778
@@ -526,73 +789,37 @@ CONFIG_SCSI_WAIT_SCAN=m
526# SCSI Transports 789# SCSI Transports
527# 790#
528CONFIG_SCSI_SPI_ATTRS=y 791CONFIG_SCSI_SPI_ATTRS=y
529CONFIG_SCSI_FC_ATTRS=y 792# CONFIG_SCSI_FC_ATTRS is not set
530# CONFIG_SCSI_ISCSI_ATTRS is not set 793# CONFIG_SCSI_ISCSI_ATTRS is not set
531CONFIG_SCSI_SAS_ATTRS=y 794# CONFIG_SCSI_SAS_ATTRS is not set
532# CONFIG_SCSI_SAS_LIBSAS is not set 795# CONFIG_SCSI_SAS_LIBSAS is not set
533 796# CONFIG_SCSI_SRP_ATTRS is not set
534# 797# CONFIG_SCSI_LOWLEVEL is not set
535# SCSI low-level drivers 798# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
536#
537# CONFIG_ISCSI_TCP is not set
538# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
539# CONFIG_SCSI_3W_9XXX is not set
540# CONFIG_SCSI_ACARD is not set
541# CONFIG_SCSI_AACRAID is not set
542# CONFIG_SCSI_AIC7XXX is not set
543# CONFIG_SCSI_AIC7XXX_OLD is not set
544CONFIG_SCSI_AIC79XX=y
545CONFIG_AIC79XX_CMDS_PER_DEVICE=32
546CONFIG_AIC79XX_RESET_DELAY_MS=4000
547# CONFIG_AIC79XX_DEBUG_ENABLE is not set
548CONFIG_AIC79XX_DEBUG_MASK=0
549# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
550# CONFIG_SCSI_AIC94XX is not set
551# CONFIG_SCSI_ARCMSR is not set
552# CONFIG_MEGARAID_NEWGEN is not set
553# CONFIG_MEGARAID_LEGACY is not set
554# CONFIG_MEGARAID_SAS is not set
555# CONFIG_SCSI_HPTIOP is not set
556# CONFIG_SCSI_BUSLOGIC is not set
557# CONFIG_SCSI_DMX3191D is not set
558# CONFIG_SCSI_EATA is not set
559# CONFIG_SCSI_FUTURE_DOMAIN is not set
560# CONFIG_SCSI_GDTH is not set
561# CONFIG_SCSI_IPS is not set
562# CONFIG_SCSI_INITIO is not set
563# CONFIG_SCSI_INIA100 is not set
564# CONFIG_SCSI_STEX is not set
565# CONFIG_SCSI_SYM53C8XX_2 is not set
566# CONFIG_SCSI_IPR is not set
567# CONFIG_SCSI_QLOGIC_1280 is not set
568# CONFIG_SCSI_QLA_FC is not set
569# CONFIG_SCSI_QLA_ISCSI is not set
570# CONFIG_SCSI_LPFC is not set
571# CONFIG_SCSI_DC395x is not set
572# CONFIG_SCSI_DC390T is not set
573# CONFIG_SCSI_DEBUG is not set
574# CONFIG_SCSI_SRP is not set
575CONFIG_ATA=y 799CONFIG_ATA=y
576# CONFIG_ATA_NONSTANDARD is not set 800# CONFIG_ATA_NONSTANDARD is not set
577CONFIG_ATA_ACPI=y 801CONFIG_ATA_ACPI=y
802CONFIG_SATA_PMP=y
578CONFIG_SATA_AHCI=y 803CONFIG_SATA_AHCI=y
579CONFIG_SATA_SVW=y 804# CONFIG_SATA_SIL24 is not set
805CONFIG_ATA_SFF=y
806# CONFIG_SATA_SVW is not set
580CONFIG_ATA_PIIX=y 807CONFIG_ATA_PIIX=y
581# CONFIG_SATA_MV is not set 808# CONFIG_SATA_MV is not set
582CONFIG_SATA_NV=y 809# CONFIG_SATA_NV is not set
583# CONFIG_PDC_ADMA is not set 810# CONFIG_PDC_ADMA is not set
584# CONFIG_SATA_QSTOR is not set 811# CONFIG_SATA_QSTOR is not set
585# CONFIG_SATA_PROMISE is not set 812# CONFIG_SATA_PROMISE is not set
586# CONFIG_SATA_SX4 is not set 813# CONFIG_SATA_SX4 is not set
587CONFIG_SATA_SIL=y 814# CONFIG_SATA_SIL is not set
588# CONFIG_SATA_SIL24 is not set
589# CONFIG_SATA_SIS is not set 815# CONFIG_SATA_SIS is not set
590# CONFIG_SATA_ULI is not set 816# CONFIG_SATA_ULI is not set
591CONFIG_SATA_VIA=y 817# CONFIG_SATA_VIA is not set
592# CONFIG_SATA_VITESSE is not set 818# CONFIG_SATA_VITESSE is not set
593# CONFIG_SATA_INIC162X is not set 819# CONFIG_SATA_INIC162X is not set
820# CONFIG_PATA_ACPI is not set
594# CONFIG_PATA_ALI is not set 821# CONFIG_PATA_ALI is not set
595# CONFIG_PATA_AMD is not set 822CONFIG_PATA_AMD=y
596# CONFIG_PATA_ARTOP is not set 823# CONFIG_PATA_ARTOP is not set
597# CONFIG_PATA_ATIIXP is not set 824# CONFIG_PATA_ATIIXP is not set
598# CONFIG_PATA_CMD640_PCI is not set 825# CONFIG_PATA_CMD640_PCI is not set
@@ -612,11 +839,14 @@ CONFIG_SATA_VIA=y
612# CONFIG_PATA_TRIFLEX is not set 839# CONFIG_PATA_TRIFLEX is not set
613# CONFIG_PATA_MARVELL is not set 840# CONFIG_PATA_MARVELL is not set
614# CONFIG_PATA_MPIIX is not set 841# CONFIG_PATA_MPIIX is not set
615# CONFIG_PATA_OLDPIIX is not set 842CONFIG_PATA_OLDPIIX=y
616# CONFIG_PATA_NETCELL is not set 843# CONFIG_PATA_NETCELL is not set
844# CONFIG_PATA_NINJA32 is not set
617# CONFIG_PATA_NS87410 is not set 845# CONFIG_PATA_NS87410 is not set
846# CONFIG_PATA_NS87415 is not set
618# CONFIG_PATA_OPTI is not set 847# CONFIG_PATA_OPTI is not set
619# CONFIG_PATA_OPTIDMA is not set 848# CONFIG_PATA_OPTIDMA is not set
849# CONFIG_PATA_PCMCIA is not set
620# CONFIG_PATA_PDC_OLD is not set 850# CONFIG_PATA_PDC_OLD is not set
621# CONFIG_PATA_RADISYS is not set 851# CONFIG_PATA_RADISYS is not set
622# CONFIG_PATA_RZ1000 is not set 852# CONFIG_PATA_RZ1000 is not set
@@ -628,65 +858,42 @@ CONFIG_SATA_VIA=y
628# CONFIG_PATA_VIA is not set 858# CONFIG_PATA_VIA is not set
629# CONFIG_PATA_WINBOND is not set 859# CONFIG_PATA_WINBOND is not set
630CONFIG_MD=y 860CONFIG_MD=y
631# CONFIG_BLK_DEV_MD is not set 861CONFIG_BLK_DEV_MD=y
862# CONFIG_MD_LINEAR is not set
863# CONFIG_MD_RAID0 is not set
864# CONFIG_MD_RAID1 is not set
865# CONFIG_MD_RAID10 is not set
866# CONFIG_MD_RAID456 is not set
867# CONFIG_MD_MULTIPATH is not set
868# CONFIG_MD_FAULTY is not set
632CONFIG_BLK_DEV_DM=y 869CONFIG_BLK_DEV_DM=y
633# CONFIG_DM_DEBUG is not set 870# CONFIG_DM_DEBUG is not set
634# CONFIG_DM_CRYPT is not set 871# CONFIG_DM_CRYPT is not set
635# CONFIG_DM_SNAPSHOT is not set 872# CONFIG_DM_SNAPSHOT is not set
636# CONFIG_DM_MIRROR is not set 873CONFIG_DM_MIRROR=y
637# CONFIG_DM_ZERO is not set 874CONFIG_DM_ZERO=y
638# CONFIG_DM_MULTIPATH is not set 875# CONFIG_DM_MULTIPATH is not set
639# CONFIG_DM_DELAY is not set 876# CONFIG_DM_DELAY is not set
640 877# CONFIG_DM_UEVENT is not set
641# 878# CONFIG_FUSION is not set
642# Fusion MPT device support
643#
644CONFIG_FUSION=y
645CONFIG_FUSION_SPI=y
646# CONFIG_FUSION_FC is not set
647# CONFIG_FUSION_SAS is not set
648CONFIG_FUSION_MAX_SGE=128
649# CONFIG_FUSION_CTL is not set
650 879
651# 880#
652# IEEE 1394 (FireWire) support 881# IEEE 1394 (FireWire) support
653# 882#
654# CONFIG_FIREWIRE is not set 883# CONFIG_FIREWIRE is not set
655CONFIG_IEEE1394=y 884# CONFIG_IEEE1394 is not set
656
657#
658# Subsystem Options
659#
660# CONFIG_IEEE1394_VERBOSEDEBUG is not set
661
662#
663# Controllers
664#
665
666#
667# Texas Instruments PCILynx requires I2C
668#
669CONFIG_IEEE1394_OHCI1394=y
670
671#
672# Protocols
673#
674# CONFIG_IEEE1394_VIDEO1394 is not set
675# CONFIG_IEEE1394_SBP2 is not set
676# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
677# CONFIG_IEEE1394_ETH1394 is not set
678# CONFIG_IEEE1394_DV1394 is not set
679CONFIG_IEEE1394_RAWIO=y
680# CONFIG_I2O is not set 885# CONFIG_I2O is not set
681CONFIG_MACINTOSH_DRIVERS=y 886CONFIG_MACINTOSH_DRIVERS=y
682# CONFIG_MAC_EMUMOUSEBTN is not set 887CONFIG_MAC_EMUMOUSEBTN=y
683CONFIG_NETDEVICES=y 888CONFIG_NETDEVICES=y
684CONFIG_NETDEVICES_MULTIQUEUE=y 889# CONFIG_NETDEVICES_MULTIQUEUE is not set
890# CONFIG_IFB is not set
685# CONFIG_DUMMY is not set 891# CONFIG_DUMMY is not set
686# CONFIG_BONDING is not set 892# CONFIG_BONDING is not set
687# CONFIG_MACVLAN is not set 893# CONFIG_MACVLAN is not set
688# CONFIG_EQUALIZER is not set 894# CONFIG_EQUALIZER is not set
689CONFIG_TUN=y 895# CONFIG_TUN is not set
896# CONFIG_VETH is not set
690# CONFIG_NET_SB1000 is not set 897# CONFIG_NET_SB1000 is not set
691# CONFIG_ARCNET is not set 898# CONFIG_ARCNET is not set
692# CONFIG_PHYLIB is not set 899# CONFIG_PHYLIB is not set
@@ -696,39 +903,40 @@ CONFIG_MII=y
696# CONFIG_SUNGEM is not set 903# CONFIG_SUNGEM is not set
697# CONFIG_CASSINI is not set 904# CONFIG_CASSINI is not set
698CONFIG_NET_VENDOR_3COM=y 905CONFIG_NET_VENDOR_3COM=y
699CONFIG_VORTEX=y 906# CONFIG_VORTEX is not set
700# CONFIG_TYPHOON is not set 907# CONFIG_TYPHOON is not set
701CONFIG_NET_TULIP=y 908CONFIG_NET_TULIP=y
702# CONFIG_DE2104X is not set 909# CONFIG_DE2104X is not set
703CONFIG_TULIP=y 910# CONFIG_TULIP is not set
704# CONFIG_TULIP_MWI is not set
705# CONFIG_TULIP_MMIO is not set
706# CONFIG_TULIP_NAPI is not set
707# CONFIG_DE4X5 is not set 911# CONFIG_DE4X5 is not set
708# CONFIG_WINBOND_840 is not set 912# CONFIG_WINBOND_840 is not set
709# CONFIG_DM9102 is not set 913# CONFIG_DM9102 is not set
710# CONFIG_ULI526X is not set 914# CONFIG_ULI526X is not set
915# CONFIG_PCMCIA_XIRCOM is not set
711# CONFIG_HP100 is not set 916# CONFIG_HP100 is not set
917# CONFIG_IBM_NEW_EMAC_ZMII is not set
918# CONFIG_IBM_NEW_EMAC_RGMII is not set
919# CONFIG_IBM_NEW_EMAC_TAH is not set
920# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
712CONFIG_NET_PCI=y 921CONFIG_NET_PCI=y
713# CONFIG_PCNET32 is not set 922# CONFIG_PCNET32 is not set
714CONFIG_AMD8111_ETH=y 923# CONFIG_AMD8111_ETH is not set
715# CONFIG_AMD8111E_NAPI is not set
716# CONFIG_ADAPTEC_STARFIRE is not set 924# CONFIG_ADAPTEC_STARFIRE is not set
717CONFIG_B44=y 925# CONFIG_B44 is not set
718CONFIG_FORCEDETH=y 926CONFIG_FORCEDETH=y
719# CONFIG_FORCEDETH_NAPI is not set 927# CONFIG_FORCEDETH_NAPI is not set
720# CONFIG_DGRS is not set
721# CONFIG_EEPRO100 is not set 928# CONFIG_EEPRO100 is not set
722CONFIG_E100=y 929CONFIG_E100=y
723# CONFIG_FEALNX is not set 930# CONFIG_FEALNX is not set
724# CONFIG_NATSEMI is not set 931# CONFIG_NATSEMI is not set
725# CONFIG_NE2K_PCI is not set 932# CONFIG_NE2K_PCI is not set
726CONFIG_8139CP=y 933# CONFIG_8139CP is not set
727CONFIG_8139TOO=y 934CONFIG_8139TOO=y
728# CONFIG_8139TOO_PIO is not set 935CONFIG_8139TOO_PIO=y
729# CONFIG_8139TOO_TUNE_TWISTER is not set 936# CONFIG_8139TOO_TUNE_TWISTER is not set
730# CONFIG_8139TOO_8129 is not set 937# CONFIG_8139TOO_8129 is not set
731# CONFIG_8139_OLD_RX_RESET is not set 938# CONFIG_8139_OLD_RX_RESET is not set
939# CONFIG_R6040 is not set
732# CONFIG_SIS900 is not set 940# CONFIG_SIS900 is not set
733# CONFIG_EPIC100 is not set 941# CONFIG_EPIC100 is not set
734# CONFIG_SUNDANCE is not set 942# CONFIG_SUNDANCE is not set
@@ -740,34 +948,74 @@ CONFIG_NETDEV_1000=y
740CONFIG_E1000=y 948CONFIG_E1000=y
741# CONFIG_E1000_NAPI is not set 949# CONFIG_E1000_NAPI is not set
742# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set 950# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
951# CONFIG_E1000E is not set
952# CONFIG_E1000E_ENABLED is not set
953# CONFIG_IP1000 is not set
954# CONFIG_IGB is not set
743# CONFIG_NS83820 is not set 955# CONFIG_NS83820 is not set
744# CONFIG_HAMACHI is not set 956# CONFIG_HAMACHI is not set
745# CONFIG_YELLOWFIN is not set 957# CONFIG_YELLOWFIN is not set
746# CONFIG_R8169 is not set 958# CONFIG_R8169 is not set
747# CONFIG_SIS190 is not set 959# CONFIG_SIS190 is not set
748# CONFIG_SKGE is not set 960# CONFIG_SKGE is not set
749# CONFIG_SKY2 is not set 961CONFIG_SKY2=y
962# CONFIG_SKY2_DEBUG is not set
750# CONFIG_VIA_VELOCITY is not set 963# CONFIG_VIA_VELOCITY is not set
751CONFIG_TIGON3=y 964CONFIG_TIGON3=y
752CONFIG_BNX2=y 965# CONFIG_BNX2 is not set
753# CONFIG_QLA3XXX is not set 966# CONFIG_QLA3XXX is not set
754# CONFIG_ATL1 is not set 967# CONFIG_ATL1 is not set
755CONFIG_NETDEV_10000=y 968CONFIG_NETDEV_10000=y
756# CONFIG_CHELSIO_T1 is not set 969# CONFIG_CHELSIO_T1 is not set
757# CONFIG_CHELSIO_T3 is not set 970# CONFIG_CHELSIO_T3 is not set
971# CONFIG_IXGBE is not set
758# CONFIG_IXGB is not set 972# CONFIG_IXGB is not set
759CONFIG_S2IO=m 973# CONFIG_S2IO is not set
760# CONFIG_S2IO_NAPI is not set
761# CONFIG_MYRI10GE is not set 974# CONFIG_MYRI10GE is not set
762# CONFIG_NETXEN_NIC is not set 975# CONFIG_NETXEN_NIC is not set
976# CONFIG_NIU is not set
763# CONFIG_MLX4_CORE is not set 977# CONFIG_MLX4_CORE is not set
764# CONFIG_TR is not set 978# CONFIG_TEHUTI is not set
979# CONFIG_BNX2X is not set
980# CONFIG_SFC is not set
981CONFIG_TR=y
982# CONFIG_IBMOL is not set
983# CONFIG_3C359 is not set
984# CONFIG_TMS380TR is not set
765 985
766# 986#
767# Wireless LAN 987# Wireless LAN
768# 988#
769# CONFIG_WLAN_PRE80211 is not set 989# CONFIG_WLAN_PRE80211 is not set
770# CONFIG_WLAN_80211 is not set 990CONFIG_WLAN_80211=y
991# CONFIG_PCMCIA_RAYCS is not set
992# CONFIG_IPW2100 is not set
993# CONFIG_IPW2200 is not set
994# CONFIG_LIBERTAS is not set
995# CONFIG_AIRO is not set
996# CONFIG_HERMES is not set
997# CONFIG_ATMEL is not set
998# CONFIG_AIRO_CS is not set
999# CONFIG_PCMCIA_WL3501 is not set
1000# CONFIG_PRISM54 is not set
1001# CONFIG_USB_ZD1201 is not set
1002# CONFIG_USB_NET_RNDIS_WLAN is not set
1003# CONFIG_RTL8180 is not set
1004# CONFIG_RTL8187 is not set
1005# CONFIG_ADM8211 is not set
1006# CONFIG_P54_COMMON is not set
1007CONFIG_ATH5K=y
1008# CONFIG_ATH5K_DEBUG is not set
1009# CONFIG_IWLWIFI is not set
1010# CONFIG_IWLCORE is not set
1011# CONFIG_IWLWIFI_LEDS is not set
1012# CONFIG_IWL4965 is not set
1013# CONFIG_IWL3945 is not set
1014# CONFIG_HOSTAP is not set
1015# CONFIG_B43 is not set
1016# CONFIG_B43LEGACY is not set
1017# CONFIG_ZD1211RW is not set
1018# CONFIG_RT2X00 is not set
771 1019
772# 1020#
773# USB Network Adapters 1021# USB Network Adapters
@@ -776,16 +1024,26 @@ CONFIG_S2IO=m
776# CONFIG_USB_KAWETH is not set 1024# CONFIG_USB_KAWETH is not set
777# CONFIG_USB_PEGASUS is not set 1025# CONFIG_USB_PEGASUS is not set
778# CONFIG_USB_RTL8150 is not set 1026# CONFIG_USB_RTL8150 is not set
779# CONFIG_USB_USBNET_MII is not set
780# CONFIG_USB_USBNET is not set 1027# CONFIG_USB_USBNET is not set
1028CONFIG_NET_PCMCIA=y
1029# CONFIG_PCMCIA_3C589 is not set
1030# CONFIG_PCMCIA_3C574 is not set
1031# CONFIG_PCMCIA_FMVJ18X is not set
1032# CONFIG_PCMCIA_PCNET is not set
1033# CONFIG_PCMCIA_NMCLAN is not set
1034# CONFIG_PCMCIA_SMC91C92 is not set
1035# CONFIG_PCMCIA_XIRC2PS is not set
1036# CONFIG_PCMCIA_AXNET is not set
781# CONFIG_WAN is not set 1037# CONFIG_WAN is not set
782# CONFIG_FDDI is not set 1038CONFIG_FDDI=y
1039# CONFIG_DEFXX is not set
1040# CONFIG_SKFP is not set
783# CONFIG_HIPPI is not set 1041# CONFIG_HIPPI is not set
784# CONFIG_PPP is not set 1042# CONFIG_PPP is not set
785# CONFIG_SLIP is not set 1043# CONFIG_SLIP is not set
786# CONFIG_NET_FC is not set 1044# CONFIG_NET_FC is not set
787# CONFIG_SHAPER is not set
788CONFIG_NETCONSOLE=y 1045CONFIG_NETCONSOLE=y
1046# CONFIG_NETCONSOLE_DYNAMIC is not set
789CONFIG_NETPOLL=y 1047CONFIG_NETPOLL=y
790# CONFIG_NETPOLL_TRAP is not set 1048# CONFIG_NETPOLL_TRAP is not set
791CONFIG_NET_POLL_CONTROLLER=y 1049CONFIG_NET_POLL_CONTROLLER=y
@@ -796,18 +1054,17 @@ CONFIG_NET_POLL_CONTROLLER=y
796# Input device support 1054# Input device support
797# 1055#
798CONFIG_INPUT=y 1056CONFIG_INPUT=y
799# CONFIG_INPUT_FF_MEMLESS is not set 1057CONFIG_INPUT_FF_MEMLESS=y
800# CONFIG_INPUT_POLLDEV is not set 1058CONFIG_INPUT_POLLDEV=y
801 1059
802# 1060#
803# Userland interfaces 1061# Userland interfaces
804# 1062#
805CONFIG_INPUT_MOUSEDEV=y 1063CONFIG_INPUT_MOUSEDEV=y
806CONFIG_INPUT_MOUSEDEV_PSAUX=y 1064# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
807CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 1065CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
808CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 1066CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
809# CONFIG_INPUT_JOYDEV is not set 1067# CONFIG_INPUT_JOYDEV is not set
810# CONFIG_INPUT_TSDEV is not set
811CONFIG_INPUT_EVDEV=y 1068CONFIG_INPUT_EVDEV=y
812# CONFIG_INPUT_EVBUG is not set 1069# CONFIG_INPUT_EVBUG is not set
813 1070
@@ -832,17 +1089,62 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y
832# CONFIG_MOUSE_SERIAL is not set 1089# CONFIG_MOUSE_SERIAL is not set
833# CONFIG_MOUSE_APPLETOUCH is not set 1090# CONFIG_MOUSE_APPLETOUCH is not set
834# CONFIG_MOUSE_VSXXXAA is not set 1091# CONFIG_MOUSE_VSXXXAA is not set
835# CONFIG_INPUT_JOYSTICK is not set 1092CONFIG_INPUT_JOYSTICK=y
836# CONFIG_INPUT_TABLET is not set 1093# CONFIG_JOYSTICK_ANALOG is not set
837# CONFIG_INPUT_TOUCHSCREEN is not set 1094# CONFIG_JOYSTICK_A3D is not set
838# CONFIG_INPUT_MISC is not set 1095# CONFIG_JOYSTICK_ADI is not set
1096# CONFIG_JOYSTICK_COBRA is not set
1097# CONFIG_JOYSTICK_GF2K is not set
1098# CONFIG_JOYSTICK_GRIP is not set
1099# CONFIG_JOYSTICK_GRIP_MP is not set
1100# CONFIG_JOYSTICK_GUILLEMOT is not set
1101# CONFIG_JOYSTICK_INTERACT is not set
1102# CONFIG_JOYSTICK_SIDEWINDER is not set
1103# CONFIG_JOYSTICK_TMDC is not set
1104# CONFIG_JOYSTICK_IFORCE is not set
1105# CONFIG_JOYSTICK_WARRIOR is not set
1106# CONFIG_JOYSTICK_MAGELLAN is not set
1107# CONFIG_JOYSTICK_SPACEORB is not set
1108# CONFIG_JOYSTICK_SPACEBALL is not set
1109# CONFIG_JOYSTICK_STINGER is not set
1110# CONFIG_JOYSTICK_TWIDJOY is not set
1111# CONFIG_JOYSTICK_ZHENHUA is not set
1112# CONFIG_JOYSTICK_JOYDUMP is not set
1113# CONFIG_JOYSTICK_XPAD is not set
1114CONFIG_INPUT_TABLET=y
1115# CONFIG_TABLET_USB_ACECAD is not set
1116# CONFIG_TABLET_USB_AIPTEK is not set
1117# CONFIG_TABLET_USB_GTCO is not set
1118# CONFIG_TABLET_USB_KBTAB is not set
1119# CONFIG_TABLET_USB_WACOM is not set
1120CONFIG_INPUT_TOUCHSCREEN=y
1121# CONFIG_TOUCHSCREEN_FUJITSU is not set
1122# CONFIG_TOUCHSCREEN_GUNZE is not set
1123# CONFIG_TOUCHSCREEN_ELO is not set
1124# CONFIG_TOUCHSCREEN_MTOUCH is not set
1125# CONFIG_TOUCHSCREEN_MK712 is not set
1126# CONFIG_TOUCHSCREEN_PENMOUNT is not set
1127# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
1128# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
1129# CONFIG_TOUCHSCREEN_UCB1400 is not set
1130# CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
1131CONFIG_INPUT_MISC=y
1132# CONFIG_INPUT_PCSPKR is not set
1133# CONFIG_INPUT_APANEL is not set
1134# CONFIG_INPUT_ATLAS_BTNS is not set
1135# CONFIG_INPUT_ATI_REMOTE is not set
1136# CONFIG_INPUT_ATI_REMOTE2 is not set
1137# CONFIG_INPUT_KEYSPAN_REMOTE is not set
1138# CONFIG_INPUT_POWERMATE is not set
1139# CONFIG_INPUT_YEALINK is not set
1140# CONFIG_INPUT_UINPUT is not set
839 1141
840# 1142#
841# Hardware I/O ports 1143# Hardware I/O ports
842# 1144#
843CONFIG_SERIO=y 1145CONFIG_SERIO=y
844CONFIG_SERIO_I8042=y 1146CONFIG_SERIO_I8042=y
845# CONFIG_SERIO_SERPORT is not set 1147CONFIG_SERIO_SERPORT=y
846# CONFIG_SERIO_CT82C710 is not set 1148# CONFIG_SERIO_CT82C710 is not set
847# CONFIG_SERIO_PCIPS2 is not set 1149# CONFIG_SERIO_PCIPS2 is not set
848CONFIG_SERIO_LIBPS2=y 1150CONFIG_SERIO_LIBPS2=y
@@ -855,8 +1157,26 @@ CONFIG_SERIO_LIBPS2=y
855CONFIG_VT=y 1157CONFIG_VT=y
856CONFIG_VT_CONSOLE=y 1158CONFIG_VT_CONSOLE=y
857CONFIG_HW_CONSOLE=y 1159CONFIG_HW_CONSOLE=y
858# CONFIG_VT_HW_CONSOLE_BINDING is not set 1160CONFIG_VT_HW_CONSOLE_BINDING=y
859# CONFIG_SERIAL_NONSTANDARD is not set 1161CONFIG_DEVKMEM=y
1162CONFIG_SERIAL_NONSTANDARD=y
1163# CONFIG_COMPUTONE is not set
1164# CONFIG_ROCKETPORT is not set
1165# CONFIG_CYCLADES is not set
1166# CONFIG_DIGIEPCA is not set
1167# CONFIG_MOXA_INTELLIO is not set
1168# CONFIG_MOXA_SMARTIO is not set
1169# CONFIG_ISI is not set
1170# CONFIG_SYNCLINK is not set
1171# CONFIG_SYNCLINKMP is not set
1172# CONFIG_SYNCLINK_GT is not set
1173# CONFIG_N_HDLC is not set
1174# CONFIG_RISCOM8 is not set
1175# CONFIG_SPECIALIX is not set
1176# CONFIG_SX is not set
1177# CONFIG_RIO is not set
1178# CONFIG_STALDRV is not set
1179# CONFIG_NOZOMI is not set
860 1180
861# 1181#
862# Serial drivers 1182# Serial drivers
@@ -866,9 +1186,14 @@ CONFIG_SERIAL_8250_CONSOLE=y
866CONFIG_FIX_EARLYCON_MEM=y 1186CONFIG_FIX_EARLYCON_MEM=y
867CONFIG_SERIAL_8250_PCI=y 1187CONFIG_SERIAL_8250_PCI=y
868CONFIG_SERIAL_8250_PNP=y 1188CONFIG_SERIAL_8250_PNP=y
869CONFIG_SERIAL_8250_NR_UARTS=4 1189# CONFIG_SERIAL_8250_CS is not set
1190CONFIG_SERIAL_8250_NR_UARTS=32
870CONFIG_SERIAL_8250_RUNTIME_UARTS=4 1191CONFIG_SERIAL_8250_RUNTIME_UARTS=4
871# CONFIG_SERIAL_8250_EXTENDED is not set 1192CONFIG_SERIAL_8250_EXTENDED=y
1193CONFIG_SERIAL_8250_MANY_PORTS=y
1194CONFIG_SERIAL_8250_SHARE_IRQ=y
1195CONFIG_SERIAL_8250_DETECT_IRQ=y
1196CONFIG_SERIAL_8250_RSA=y
872 1197
873# 1198#
874# Non-8250 serial port support 1199# Non-8250 serial port support
@@ -877,78 +1202,260 @@ CONFIG_SERIAL_CORE=y
877CONFIG_SERIAL_CORE_CONSOLE=y 1202CONFIG_SERIAL_CORE_CONSOLE=y
878# CONFIG_SERIAL_JSM is not set 1203# CONFIG_SERIAL_JSM is not set
879CONFIG_UNIX98_PTYS=y 1204CONFIG_UNIX98_PTYS=y
880CONFIG_LEGACY_PTYS=y 1205# CONFIG_LEGACY_PTYS is not set
881CONFIG_LEGACY_PTY_COUNT=256
882# CONFIG_IPMI_HANDLER is not set 1206# CONFIG_IPMI_HANDLER is not set
883# CONFIG_WATCHDOG is not set
884CONFIG_HW_RANDOM=y 1207CONFIG_HW_RANDOM=y
885CONFIG_HW_RANDOM_INTEL=y 1208# CONFIG_HW_RANDOM_INTEL is not set
886CONFIG_HW_RANDOM_AMD=y 1209# CONFIG_HW_RANDOM_AMD is not set
887# CONFIG_NVRAM is not set 1210CONFIG_NVRAM=y
888CONFIG_RTC=y
889# CONFIG_R3964 is not set 1211# CONFIG_R3964 is not set
890# CONFIG_APPLICOM is not set 1212# CONFIG_APPLICOM is not set
891CONFIG_AGP=y 1213
892CONFIG_AGP_AMD64=y 1214#
893CONFIG_AGP_INTEL=y 1215# PCMCIA character devices
894# CONFIG_AGP_SIS is not set 1216#
895# CONFIG_AGP_VIA is not set 1217# CONFIG_SYNCLINK_CS is not set
896# CONFIG_DRM is not set 1218# CONFIG_CARDMAN_4000 is not set
1219# CONFIG_CARDMAN_4040 is not set
1220# CONFIG_IPWIRELESS is not set
897# CONFIG_MWAVE is not set 1221# CONFIG_MWAVE is not set
898# CONFIG_PC8736x_GPIO is not set 1222# CONFIG_PC8736x_GPIO is not set
899CONFIG_RAW_DRIVER=y 1223# CONFIG_RAW_DRIVER is not set
900CONFIG_MAX_RAW_DEVS=256
901CONFIG_HPET=y 1224CONFIG_HPET=y
902# CONFIG_HPET_RTC_IRQ is not set 1225# CONFIG_HPET_RTC_IRQ is not set
903CONFIG_HPET_MMAP=y 1226# CONFIG_HPET_MMAP is not set
904# CONFIG_HANGCHECK_TIMER is not set 1227# CONFIG_HANGCHECK_TIMER is not set
905# CONFIG_TCG_TPM is not set 1228# CONFIG_TCG_TPM is not set
906# CONFIG_TELCLOCK is not set 1229# CONFIG_TELCLOCK is not set
907CONFIG_DEVPORT=y 1230CONFIG_DEVPORT=y
908# CONFIG_I2C is not set 1231CONFIG_I2C=y
909 1232CONFIG_I2C_BOARDINFO=y
910# 1233# CONFIG_I2C_CHARDEV is not set
911# SPI support 1234
912# 1235#
1236# I2C Hardware Bus support
1237#
1238# CONFIG_I2C_ALI1535 is not set
1239# CONFIG_I2C_ALI1563 is not set
1240# CONFIG_I2C_ALI15X3 is not set
1241# CONFIG_I2C_AMD756 is not set
1242# CONFIG_I2C_AMD8111 is not set
1243CONFIG_I2C_I801=y
1244# CONFIG_I2C_I810 is not set
1245# CONFIG_I2C_PIIX4 is not set
1246# CONFIG_I2C_NFORCE2 is not set
1247# CONFIG_I2C_OCORES is not set
1248# CONFIG_I2C_PARPORT_LIGHT is not set
1249# CONFIG_I2C_PROSAVAGE is not set
1250# CONFIG_I2C_SAVAGE4 is not set
1251# CONFIG_I2C_SIMTEC is not set
1252# CONFIG_I2C_SIS5595 is not set
1253# CONFIG_I2C_SIS630 is not set
1254# CONFIG_I2C_SIS96X is not set
1255# CONFIG_I2C_TAOS_EVM is not set
1256# CONFIG_I2C_STUB is not set
1257# CONFIG_I2C_TINY_USB is not set
1258# CONFIG_I2C_VIA is not set
1259# CONFIG_I2C_VIAPRO is not set
1260# CONFIG_I2C_VOODOO3 is not set
1261# CONFIG_I2C_PCA_PLATFORM is not set
1262
1263#
1264# Miscellaneous I2C Chip support
1265#
1266# CONFIG_DS1682 is not set
1267# CONFIG_SENSORS_EEPROM is not set
1268# CONFIG_SENSORS_PCF8574 is not set
1269# CONFIG_PCF8575 is not set
1270# CONFIG_SENSORS_PCF8591 is not set
1271# CONFIG_SENSORS_MAX6875 is not set
1272# CONFIG_SENSORS_TSL2550 is not set
1273# CONFIG_I2C_DEBUG_CORE is not set
1274# CONFIG_I2C_DEBUG_ALGO is not set
1275# CONFIG_I2C_DEBUG_BUS is not set
1276# CONFIG_I2C_DEBUG_CHIP is not set
913# CONFIG_SPI is not set 1277# CONFIG_SPI is not set
914# CONFIG_SPI_MASTER is not set
915# CONFIG_W1 is not set 1278# CONFIG_W1 is not set
916# CONFIG_POWER_SUPPLY is not set 1279CONFIG_POWER_SUPPLY=y
1280# CONFIG_POWER_SUPPLY_DEBUG is not set
1281# CONFIG_PDA_POWER is not set
1282# CONFIG_BATTERY_DS2760 is not set
917# CONFIG_HWMON is not set 1283# CONFIG_HWMON is not set
1284CONFIG_THERMAL=y
1285CONFIG_WATCHDOG=y
1286# CONFIG_WATCHDOG_NOWAYOUT is not set
1287
1288#
1289# Watchdog Device Drivers
1290#
1291# CONFIG_SOFT_WATCHDOG is not set
1292# CONFIG_ACQUIRE_WDT is not set
1293# CONFIG_ADVANTECH_WDT is not set
1294# CONFIG_ALIM1535_WDT is not set
1295# CONFIG_ALIM7101_WDT is not set
1296# CONFIG_SC520_WDT is not set
1297# CONFIG_EUROTECH_WDT is not set
1298# CONFIG_IB700_WDT is not set
1299# CONFIG_IBMASR is not set
1300# CONFIG_WAFER_WDT is not set
1301# CONFIG_I6300ESB_WDT is not set
1302# CONFIG_ITCO_WDT is not set
1303# CONFIG_IT8712F_WDT is not set
1304# CONFIG_HP_WATCHDOG is not set
1305# CONFIG_SC1200_WDT is not set
1306# CONFIG_PC87413_WDT is not set
1307# CONFIG_60XX_WDT is not set
1308# CONFIG_SBC8360_WDT is not set
1309# CONFIG_CPU5_WDT is not set
1310# CONFIG_SMSC37B787_WDT is not set
1311# CONFIG_W83627HF_WDT is not set
1312# CONFIG_W83697HF_WDT is not set
1313# CONFIG_W83877F_WDT is not set
1314# CONFIG_W83977F_WDT is not set
1315# CONFIG_MACHZ_WDT is not set
1316# CONFIG_SBC_EPX_C3_WATCHDOG is not set
1317
1318#
1319# PCI-based Watchdog Cards
1320#
1321# CONFIG_PCIPCWATCHDOG is not set
1322# CONFIG_WDTPCI is not set
1323
1324#
1325# USB-based Watchdog Cards
1326#
1327# CONFIG_USBPCWATCHDOG is not set
1328
1329#
1330# Sonics Silicon Backplane
1331#
1332CONFIG_SSB_POSSIBLE=y
1333# CONFIG_SSB is not set
918 1334
919# 1335#
920# Multifunction device drivers 1336# Multifunction device drivers
921# 1337#
922# CONFIG_MFD_SM501 is not set 1338# CONFIG_MFD_SM501 is not set
1339# CONFIG_HTC_PASIC3 is not set
923 1340
924# 1341#
925# Multimedia devices 1342# Multimedia devices
926# 1343#
1344
1345#
1346# Multimedia core support
1347#
927# CONFIG_VIDEO_DEV is not set 1348# CONFIG_VIDEO_DEV is not set
928# CONFIG_DVB_CORE is not set 1349# CONFIG_DVB_CORE is not set
1350
1351#
1352# Multimedia drivers
1353#
929CONFIG_DAB=y 1354CONFIG_DAB=y
930# CONFIG_USB_DABUSB is not set 1355# CONFIG_USB_DABUSB is not set
931 1356
932# 1357#
933# Graphics support 1358# Graphics support
934# 1359#
935# CONFIG_BACKLIGHT_LCD_SUPPORT is not set 1360CONFIG_AGP=y
1361CONFIG_AGP_AMD64=y
1362CONFIG_AGP_INTEL=y
1363# CONFIG_AGP_SIS is not set
1364# CONFIG_AGP_VIA is not set
1365CONFIG_DRM=y
1366# CONFIG_DRM_TDFX is not set
1367# CONFIG_DRM_R128 is not set
1368# CONFIG_DRM_RADEON is not set
1369# CONFIG_DRM_I810 is not set
1370# CONFIG_DRM_I830 is not set
1371CONFIG_DRM_I915=y
1372# CONFIG_DRM_MGA is not set
1373# CONFIG_DRM_SIS is not set
1374# CONFIG_DRM_VIA is not set
1375# CONFIG_DRM_SAVAGE is not set
1376# CONFIG_VGASTATE is not set
1377# CONFIG_VIDEO_OUTPUT_CONTROL is not set
1378CONFIG_FB=y
1379# CONFIG_FIRMWARE_EDID is not set
1380# CONFIG_FB_DDC is not set
1381CONFIG_FB_CFB_FILLRECT=y
1382CONFIG_FB_CFB_COPYAREA=y
1383CONFIG_FB_CFB_IMAGEBLIT=y
1384# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
1385# CONFIG_FB_SYS_FILLRECT is not set
1386# CONFIG_FB_SYS_COPYAREA is not set
1387# CONFIG_FB_SYS_IMAGEBLIT is not set
1388# CONFIG_FB_FOREIGN_ENDIAN is not set
1389# CONFIG_FB_SYS_FOPS is not set
1390CONFIG_FB_DEFERRED_IO=y
1391# CONFIG_FB_SVGALIB is not set
1392# CONFIG_FB_MACMODES is not set
1393# CONFIG_FB_BACKLIGHT is not set
1394CONFIG_FB_MODE_HELPERS=y
1395CONFIG_FB_TILEBLITTING=y
1396
1397#
1398# Frame buffer hardware drivers
1399#
1400# CONFIG_FB_CIRRUS is not set
1401# CONFIG_FB_PM2 is not set
1402# CONFIG_FB_CYBER2000 is not set
1403# CONFIG_FB_ARC is not set
1404# CONFIG_FB_ASILIANT is not set
1405# CONFIG_FB_IMSTT is not set
1406# CONFIG_FB_VGA16 is not set
1407# CONFIG_FB_UVESA is not set
1408# CONFIG_FB_VESA is not set
1409CONFIG_FB_EFI=y
1410# CONFIG_FB_IMAC is not set
1411# CONFIG_FB_N411 is not set
1412# CONFIG_FB_HGA is not set
1413# CONFIG_FB_S1D13XXX is not set
1414# CONFIG_FB_NVIDIA is not set
1415# CONFIG_FB_RIVA is not set
1416# CONFIG_FB_LE80578 is not set
1417# CONFIG_FB_INTEL is not set
1418# CONFIG_FB_MATROX is not set
1419# CONFIG_FB_RADEON is not set
1420# CONFIG_FB_ATY128 is not set
1421# CONFIG_FB_ATY is not set
1422# CONFIG_FB_S3 is not set
1423# CONFIG_FB_SAVAGE is not set
1424# CONFIG_FB_SIS is not set
1425# CONFIG_FB_NEOMAGIC is not set
1426# CONFIG_FB_KYRO is not set
1427# CONFIG_FB_3DFX is not set
1428# CONFIG_FB_VOODOO1 is not set
1429# CONFIG_FB_VT8623 is not set
1430# CONFIG_FB_TRIDENT is not set
1431# CONFIG_FB_ARK is not set
1432# CONFIG_FB_PM3 is not set
1433# CONFIG_FB_GEODE is not set
1434# CONFIG_FB_VIRTUAL is not set
1435CONFIG_BACKLIGHT_LCD_SUPPORT=y
1436# CONFIG_LCD_CLASS_DEVICE is not set
1437CONFIG_BACKLIGHT_CLASS_DEVICE=y
1438# CONFIG_BACKLIGHT_CORGI is not set
1439# CONFIG_BACKLIGHT_PROGEAR is not set
936 1440
937# 1441#
938# Display device support 1442# Display device support
939# 1443#
940# CONFIG_DISPLAY_SUPPORT is not set 1444# CONFIG_DISPLAY_SUPPORT is not set
941# CONFIG_VGASTATE is not set
942# CONFIG_FB is not set
943 1445
944# 1446#
945# Console display driver support 1447# Console display driver support
946# 1448#
947CONFIG_VGA_CONSOLE=y 1449CONFIG_VGA_CONSOLE=y
948CONFIG_VGACON_SOFT_SCROLLBACK=y 1450CONFIG_VGACON_SOFT_SCROLLBACK=y
949CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256 1451CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
950CONFIG_VIDEO_SELECT=y 1452CONFIG_VIDEO_SELECT=y
951CONFIG_DUMMY_CONSOLE=y 1453CONFIG_DUMMY_CONSOLE=y
1454# CONFIG_FRAMEBUFFER_CONSOLE is not set
1455CONFIG_LOGO=y
1456# CONFIG_LOGO_LINUX_MONO is not set
1457# CONFIG_LOGO_LINUX_VGA16 is not set
1458CONFIG_LOGO_LINUX_CLUT224=y
952 1459
953# 1460#
954# Sound 1461# Sound
@@ -958,33 +1465,165 @@ CONFIG_SOUND=y
958# 1465#
959# Advanced Linux Sound Architecture 1466# Advanced Linux Sound Architecture
960# 1467#
961# CONFIG_SND is not set 1468CONFIG_SND=y
1469CONFIG_SND_TIMER=y
1470CONFIG_SND_PCM=y
1471CONFIG_SND_HWDEP=y
1472CONFIG_SND_SEQUENCER=y
1473CONFIG_SND_SEQ_DUMMY=y
1474CONFIG_SND_OSSEMUL=y
1475CONFIG_SND_MIXER_OSS=y
1476CONFIG_SND_PCM_OSS=y
1477CONFIG_SND_PCM_OSS_PLUGINS=y
1478CONFIG_SND_SEQUENCER_OSS=y
1479CONFIG_SND_DYNAMIC_MINORS=y
1480CONFIG_SND_SUPPORT_OLD_API=y
1481CONFIG_SND_VERBOSE_PROCFS=y
1482# CONFIG_SND_VERBOSE_PRINTK is not set
1483# CONFIG_SND_DEBUG is not set
1484CONFIG_SND_VMASTER=y
1485
1486#
1487# Generic devices
1488#
1489# CONFIG_SND_PCSP is not set
1490# CONFIG_SND_DUMMY is not set
1491# CONFIG_SND_VIRMIDI is not set
1492# CONFIG_SND_MTPAV is not set
1493# CONFIG_SND_SERIAL_U16550 is not set
1494# CONFIG_SND_MPU401 is not set
1495
1496#
1497# PCI devices
1498#
1499# CONFIG_SND_AD1889 is not set
1500# CONFIG_SND_ALS300 is not set
1501# CONFIG_SND_ALS4000 is not set
1502# CONFIG_SND_ALI5451 is not set
1503# CONFIG_SND_ATIIXP is not set
1504# CONFIG_SND_ATIIXP_MODEM is not set
1505# CONFIG_SND_AU8810 is not set
1506# CONFIG_SND_AU8820 is not set
1507# CONFIG_SND_AU8830 is not set
1508# CONFIG_SND_AW2 is not set
1509# CONFIG_SND_AZT3328 is not set
1510# CONFIG_SND_BT87X is not set
1511# CONFIG_SND_CA0106 is not set
1512# CONFIG_SND_CMIPCI is not set
1513# CONFIG_SND_OXYGEN is not set
1514# CONFIG_SND_CS4281 is not set
1515# CONFIG_SND_CS46XX is not set
1516# CONFIG_SND_CS5530 is not set
1517# CONFIG_SND_DARLA20 is not set
1518# CONFIG_SND_GINA20 is not set
1519# CONFIG_SND_LAYLA20 is not set
1520# CONFIG_SND_DARLA24 is not set
1521# CONFIG_SND_GINA24 is not set
1522# CONFIG_SND_LAYLA24 is not set
1523# CONFIG_SND_MONA is not set
1524# CONFIG_SND_MIA is not set
1525# CONFIG_SND_ECHO3G is not set
1526# CONFIG_SND_INDIGO is not set
1527# CONFIG_SND_INDIGOIO is not set
1528# CONFIG_SND_INDIGODJ is not set
1529# CONFIG_SND_EMU10K1 is not set
1530# CONFIG_SND_EMU10K1X is not set
1531# CONFIG_SND_ENS1370 is not set
1532# CONFIG_SND_ENS1371 is not set
1533# CONFIG_SND_ES1938 is not set
1534# CONFIG_SND_ES1968 is not set
1535# CONFIG_SND_FM801 is not set
1536CONFIG_SND_HDA_INTEL=y
1537CONFIG_SND_HDA_HWDEP=y
1538CONFIG_SND_HDA_CODEC_REALTEK=y
1539CONFIG_SND_HDA_CODEC_ANALOG=y
1540CONFIG_SND_HDA_CODEC_SIGMATEL=y
1541CONFIG_SND_HDA_CODEC_VIA=y
1542CONFIG_SND_HDA_CODEC_ATIHDMI=y
1543CONFIG_SND_HDA_CODEC_CONEXANT=y
1544CONFIG_SND_HDA_CODEC_CMEDIA=y
1545CONFIG_SND_HDA_CODEC_SI3054=y
1546CONFIG_SND_HDA_GENERIC=y
1547# CONFIG_SND_HDA_POWER_SAVE is not set
1548# CONFIG_SND_HDSP is not set
1549# CONFIG_SND_HDSPM is not set
1550# CONFIG_SND_HIFIER is not set
1551# CONFIG_SND_ICE1712 is not set
1552# CONFIG_SND_ICE1724 is not set
1553# CONFIG_SND_INTEL8X0 is not set
1554# CONFIG_SND_INTEL8X0M is not set
1555# CONFIG_SND_KORG1212 is not set
1556# CONFIG_SND_MAESTRO3 is not set
1557# CONFIG_SND_MIXART is not set
1558# CONFIG_SND_NM256 is not set
1559# CONFIG_SND_PCXHR is not set
1560# CONFIG_SND_RIPTIDE is not set
1561# CONFIG_SND_RME32 is not set
1562# CONFIG_SND_RME96 is not set
1563# CONFIG_SND_RME9652 is not set
1564# CONFIG_SND_SONICVIBES is not set
1565# CONFIG_SND_TRIDENT is not set
1566# CONFIG_SND_VIA82XX is not set
1567# CONFIG_SND_VIA82XX_MODEM is not set
1568# CONFIG_SND_VIRTUOSO is not set
1569# CONFIG_SND_VX222 is not set
1570# CONFIG_SND_YMFPCI is not set
1571
1572#
1573# USB devices
1574#
1575# CONFIG_SND_USB_AUDIO is not set
1576# CONFIG_SND_USB_USX2Y is not set
1577# CONFIG_SND_USB_CAIAQ is not set
1578
1579#
1580# PCMCIA devices
1581#
1582# CONFIG_SND_VXPOCKET is not set
1583# CONFIG_SND_PDAUDIOCF is not set
1584
1585#
1586# System on Chip audio support
1587#
1588# CONFIG_SND_SOC is not set
1589
1590#
1591# ALSA SoC audio for Freescale SOCs
1592#
1593
1594#
1595# SoC Audio for the Texas Instruments OMAP
1596#
962 1597
963# 1598#
964# Open Sound System 1599# Open Sound System
965# 1600#
966CONFIG_SOUND_PRIME=y 1601# CONFIG_SOUND_PRIME is not set
967# CONFIG_SOUND_TRIDENT is not set
968# CONFIG_SOUND_MSNDCLAS is not set
969# CONFIG_SOUND_MSNDPIN is not set
970# CONFIG_SOUND_OSS is not set
971CONFIG_HID_SUPPORT=y 1602CONFIG_HID_SUPPORT=y
972CONFIG_HID=y 1603CONFIG_HID=y
973# CONFIG_HID_DEBUG is not set 1604CONFIG_HID_DEBUG=y
1605CONFIG_HIDRAW=y
974 1606
975# 1607#
976# USB Input Devices 1608# USB Input Devices
977# 1609#
978CONFIG_USB_HID=y 1610CONFIG_USB_HID=y
979# CONFIG_USB_HIDINPUT_POWERBOOK is not set 1611CONFIG_USB_HIDINPUT_POWERBOOK=y
980# CONFIG_HID_FF is not set 1612CONFIG_HID_FF=y
981# CONFIG_USB_HIDDEV is not set 1613CONFIG_HID_PID=y
1614CONFIG_LOGITECH_FF=y
1615# CONFIG_LOGIRUMBLEPAD2_FF is not set
1616CONFIG_PANTHERLORD_FF=y
1617CONFIG_THRUSTMASTER_FF=y
1618CONFIG_ZEROPLUS_FF=y
1619CONFIG_USB_HIDDEV=y
982CONFIG_USB_SUPPORT=y 1620CONFIG_USB_SUPPORT=y
983CONFIG_USB_ARCH_HAS_HCD=y 1621CONFIG_USB_ARCH_HAS_HCD=y
984CONFIG_USB_ARCH_HAS_OHCI=y 1622CONFIG_USB_ARCH_HAS_OHCI=y
985CONFIG_USB_ARCH_HAS_EHCI=y 1623CONFIG_USB_ARCH_HAS_EHCI=y
986CONFIG_USB=y 1624CONFIG_USB=y
987# CONFIG_USB_DEBUG is not set 1625CONFIG_USB_DEBUG=y
1626CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
988 1627
989# 1628#
990# Miscellaneous USB options 1629# Miscellaneous USB options
@@ -992,18 +1631,18 @@ CONFIG_USB=y
992CONFIG_USB_DEVICEFS=y 1631CONFIG_USB_DEVICEFS=y
993# CONFIG_USB_DEVICE_CLASS is not set 1632# CONFIG_USB_DEVICE_CLASS is not set
994# CONFIG_USB_DYNAMIC_MINORS is not set 1633# CONFIG_USB_DYNAMIC_MINORS is not set
995# CONFIG_USB_SUSPEND is not set 1634CONFIG_USB_SUSPEND=y
996# CONFIG_USB_PERSIST is not set
997# CONFIG_USB_OTG is not set 1635# CONFIG_USB_OTG is not set
998 1636
999# 1637#
1000# USB Host Controller Drivers 1638# USB Host Controller Drivers
1001# 1639#
1640# CONFIG_USB_C67X00_HCD is not set
1002CONFIG_USB_EHCI_HCD=y 1641CONFIG_USB_EHCI_HCD=y
1003# CONFIG_USB_EHCI_SPLIT_ISO is not set
1004# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1642# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1005# CONFIG_USB_EHCI_TT_NEWSCHED is not set 1643# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1006# CONFIG_USB_ISP116X_HCD is not set 1644# CONFIG_USB_ISP116X_HCD is not set
1645# CONFIG_USB_ISP1760_HCD is not set
1007CONFIG_USB_OHCI_HCD=y 1646CONFIG_USB_OHCI_HCD=y
1008# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set 1647# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
1009# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set 1648# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
@@ -1036,8 +1675,10 @@ CONFIG_USB_STORAGE=y
1036# CONFIG_USB_STORAGE_SDDR55 is not set 1675# CONFIG_USB_STORAGE_SDDR55 is not set
1037# CONFIG_USB_STORAGE_JUMPSHOT is not set 1676# CONFIG_USB_STORAGE_JUMPSHOT is not set
1038# CONFIG_USB_STORAGE_ALAUDA is not set 1677# CONFIG_USB_STORAGE_ALAUDA is not set
1678# CONFIG_USB_STORAGE_ONETOUCH is not set
1039# CONFIG_USB_STORAGE_KARMA is not set 1679# CONFIG_USB_STORAGE_KARMA is not set
1040# CONFIG_USB_LIBUSUAL is not set 1680# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
1681CONFIG_USB_LIBUSUAL=y
1041 1682
1042# 1683#
1043# USB Imaging devices 1684# USB Imaging devices
@@ -1049,10 +1690,6 @@ CONFIG_USB_MON=y
1049# 1690#
1050# USB port drivers 1691# USB port drivers
1051# 1692#
1052
1053#
1054# USB Serial Converter support
1055#
1056# CONFIG_USB_SERIAL is not set 1693# CONFIG_USB_SERIAL is not set
1057 1694
1058# 1695#
@@ -1078,98 +1715,126 @@ CONFIG_USB_MON=y
1078# CONFIG_USB_TRANCEVIBRATOR is not set 1715# CONFIG_USB_TRANCEVIBRATOR is not set
1079# CONFIG_USB_IOWARRIOR is not set 1716# CONFIG_USB_IOWARRIOR is not set
1080# CONFIG_USB_TEST is not set 1717# CONFIG_USB_TEST is not set
1718# CONFIG_USB_GADGET is not set
1719# CONFIG_MMC is not set
1720# CONFIG_MEMSTICK is not set
1721CONFIG_NEW_LEDS=y
1722CONFIG_LEDS_CLASS=y
1081 1723
1082# 1724#
1083# USB DSL modem support 1725# LED drivers
1084# 1726#
1727# CONFIG_LEDS_CLEVO_MAIL is not set
1085 1728
1086# 1729#
1087# USB Gadget Support 1730# LED Triggers
1088# 1731#
1089# CONFIG_USB_GADGET is not set 1732CONFIG_LEDS_TRIGGERS=y
1090# CONFIG_MMC is not set 1733# CONFIG_LEDS_TRIGGER_TIMER is not set
1734# CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
1735# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
1736# CONFIG_ACCESSIBILITY is not set
1737# CONFIG_INFINIBAND is not set
1738CONFIG_EDAC=y
1091 1739
1092# 1740#
1093# LED devices 1741# Reporting subsystems
1094# 1742#
1095# CONFIG_NEW_LEDS is not set 1743# CONFIG_EDAC_DEBUG is not set
1744# CONFIG_EDAC_MM_EDAC is not set
1745CONFIG_RTC_LIB=y
1746CONFIG_RTC_CLASS=y
1747# CONFIG_RTC_HCTOSYS is not set
1748# CONFIG_RTC_DEBUG is not set
1096 1749
1097# 1750#
1098# LED drivers 1751# RTC interfaces
1099# 1752#
1753CONFIG_RTC_INTF_SYSFS=y
1754CONFIG_RTC_INTF_PROC=y
1755CONFIG_RTC_INTF_DEV=y
1756# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
1757# CONFIG_RTC_DRV_TEST is not set
1100 1758
1101# 1759#
1102# LED Triggers 1760# I2C RTC drivers
1103# 1761#
1104# CONFIG_INFINIBAND is not set 1762# CONFIG_RTC_DRV_DS1307 is not set
1105# CONFIG_EDAC is not set 1763# CONFIG_RTC_DRV_DS1374 is not set
1764# CONFIG_RTC_DRV_DS1672 is not set
1765# CONFIG_RTC_DRV_MAX6900 is not set
1766# CONFIG_RTC_DRV_RS5C372 is not set
1767# CONFIG_RTC_DRV_ISL1208 is not set
1768# CONFIG_RTC_DRV_X1205 is not set
1769# CONFIG_RTC_DRV_PCF8563 is not set
1770# CONFIG_RTC_DRV_PCF8583 is not set
1771# CONFIG_RTC_DRV_M41T80 is not set
1772# CONFIG_RTC_DRV_S35390A is not set
1106 1773
1107# 1774#
1108# Real Time Clock 1775# SPI RTC drivers
1109# 1776#
1110# CONFIG_RTC_CLASS is not set
1111 1777
1112# 1778#
1113# DMA Engine support 1779# Platform RTC drivers
1114# 1780#
1115# CONFIG_DMA_ENGINE is not set 1781CONFIG_RTC_DRV_CMOS=y
1782# CONFIG_RTC_DRV_DS1511 is not set
1783# CONFIG_RTC_DRV_DS1553 is not set
1784# CONFIG_RTC_DRV_DS1742 is not set
1785# CONFIG_RTC_DRV_STK17TA8 is not set
1786# CONFIG_RTC_DRV_M48T86 is not set
1787# CONFIG_RTC_DRV_M48T59 is not set
1788# CONFIG_RTC_DRV_V3020 is not set
1116 1789
1117# 1790#
1118# DMA Clients 1791# on-CPU RTC drivers
1119# 1792#
1793CONFIG_DMADEVICES=y
1120 1794
1121# 1795#
1122# DMA Devices 1796# DMA Devices
1123# 1797#
1124CONFIG_VIRTUALIZATION=y 1798# CONFIG_INTEL_IOATDMA is not set
1125# CONFIG_KVM is not set
1126
1127#
1128# Userspace I/O
1129#
1130# CONFIG_UIO is not set 1799# CONFIG_UIO is not set
1131 1800
1132# 1801#
1133# Firmware Drivers 1802# Firmware Drivers
1134# 1803#
1135# CONFIG_EDD is not set 1804# CONFIG_EDD is not set
1805CONFIG_EFI_VARS=y
1136# CONFIG_DELL_RBU is not set 1806# CONFIG_DELL_RBU is not set
1137# CONFIG_DCDBAS is not set 1807# CONFIG_DCDBAS is not set
1138CONFIG_DMIID=y 1808CONFIG_DMIID=y
1809# CONFIG_ISCSI_IBFT_FIND is not set
1139 1810
1140# 1811#
1141# File systems 1812# File systems
1142# 1813#
1143CONFIG_EXT2_FS=y 1814# CONFIG_EXT2_FS is not set
1144CONFIG_EXT2_FS_XATTR=y
1145CONFIG_EXT2_FS_POSIX_ACL=y
1146# CONFIG_EXT2_FS_SECURITY is not set
1147# CONFIG_EXT2_FS_XIP is not set
1148CONFIG_EXT3_FS=y 1815CONFIG_EXT3_FS=y
1149CONFIG_EXT3_FS_XATTR=y 1816CONFIG_EXT3_FS_XATTR=y
1150CONFIG_EXT3_FS_POSIX_ACL=y 1817CONFIG_EXT3_FS_POSIX_ACL=y
1151# CONFIG_EXT3_FS_SECURITY is not set 1818CONFIG_EXT3_FS_SECURITY=y
1152# CONFIG_EXT4DEV_FS is not set 1819# CONFIG_EXT4DEV_FS is not set
1153CONFIG_JBD=y 1820CONFIG_JBD=y
1154# CONFIG_JBD_DEBUG is not set 1821# CONFIG_JBD_DEBUG is not set
1155CONFIG_FS_MBCACHE=y 1822CONFIG_FS_MBCACHE=y
1156CONFIG_REISERFS_FS=y 1823# CONFIG_REISERFS_FS is not set
1157# CONFIG_REISERFS_CHECK is not set
1158# CONFIG_REISERFS_PROC_INFO is not set
1159CONFIG_REISERFS_FS_XATTR=y
1160CONFIG_REISERFS_FS_POSIX_ACL=y
1161# CONFIG_REISERFS_FS_SECURITY is not set
1162# CONFIG_JFS_FS is not set 1824# CONFIG_JFS_FS is not set
1163CONFIG_FS_POSIX_ACL=y 1825CONFIG_FS_POSIX_ACL=y
1164# CONFIG_XFS_FS is not set 1826# CONFIG_XFS_FS is not set
1165# CONFIG_GFS2_FS is not set 1827# CONFIG_GFS2_FS is not set
1166# CONFIG_OCFS2_FS is not set 1828# CONFIG_OCFS2_FS is not set
1167# CONFIG_MINIX_FS is not set 1829CONFIG_DNOTIFY=y
1168# CONFIG_ROMFS_FS is not set
1169CONFIG_INOTIFY=y 1830CONFIG_INOTIFY=y
1170CONFIG_INOTIFY_USER=y 1831CONFIG_INOTIFY_USER=y
1171# CONFIG_QUOTA is not set 1832CONFIG_QUOTA=y
1172CONFIG_DNOTIFY=y 1833CONFIG_QUOTA_NETLINK_INTERFACE=y
1834# CONFIG_PRINT_QUOTA_WARNING is not set
1835# CONFIG_QFMT_V1 is not set
1836CONFIG_QFMT_V2=y
1837CONFIG_QUOTACTL=y
1173# CONFIG_AUTOFS_FS is not set 1838# CONFIG_AUTOFS_FS is not set
1174CONFIG_AUTOFS4_FS=y 1839CONFIG_AUTOFS4_FS=y
1175# CONFIG_FUSE_FS is not set 1840# CONFIG_FUSE_FS is not set
@@ -1180,7 +1845,7 @@ CONFIG_GENERIC_ACL=y
1180# 1845#
1181CONFIG_ISO9660_FS=y 1846CONFIG_ISO9660_FS=y
1182CONFIG_JOLIET=y 1847CONFIG_JOLIET=y
1183# CONFIG_ZISOFS is not set 1848CONFIG_ZISOFS=y
1184# CONFIG_UDF_FS is not set 1849# CONFIG_UDF_FS is not set
1185 1850
1186# 1851#
@@ -1198,13 +1863,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
1198# 1863#
1199CONFIG_PROC_FS=y 1864CONFIG_PROC_FS=y
1200CONFIG_PROC_KCORE=y 1865CONFIG_PROC_KCORE=y
1866CONFIG_PROC_VMCORE=y
1201CONFIG_PROC_SYSCTL=y 1867CONFIG_PROC_SYSCTL=y
1202CONFIG_SYSFS=y 1868CONFIG_SYSFS=y
1203CONFIG_TMPFS=y 1869CONFIG_TMPFS=y
1204CONFIG_TMPFS_POSIX_ACL=y 1870CONFIG_TMPFS_POSIX_ACL=y
1205CONFIG_HUGETLBFS=y 1871CONFIG_HUGETLBFS=y
1206CONFIG_HUGETLB_PAGE=y 1872CONFIG_HUGETLB_PAGE=y
1207CONFIG_RAMFS=y
1208# CONFIG_CONFIGFS_FS is not set 1873# CONFIG_CONFIGFS_FS is not set
1209 1874
1210# 1875#
@@ -1212,6 +1877,7 @@ CONFIG_RAMFS=y
1212# 1877#
1213# CONFIG_ADFS_FS is not set 1878# CONFIG_ADFS_FS is not set
1214# CONFIG_AFFS_FS is not set 1879# CONFIG_AFFS_FS is not set
1880# CONFIG_ECRYPT_FS is not set
1215# CONFIG_HFS_FS is not set 1881# CONFIG_HFS_FS is not set
1216# CONFIG_HFSPLUS_FS is not set 1882# CONFIG_HFSPLUS_FS is not set
1217# CONFIG_BEFS_FS is not set 1883# CONFIG_BEFS_FS is not set
@@ -1219,33 +1885,15 @@ CONFIG_RAMFS=y
1219# CONFIG_EFS_FS is not set 1885# CONFIG_EFS_FS is not set
1220# CONFIG_CRAMFS is not set 1886# CONFIG_CRAMFS is not set
1221# CONFIG_VXFS_FS is not set 1887# CONFIG_VXFS_FS is not set
1888# CONFIG_MINIX_FS is not set
1222# CONFIG_HPFS_FS is not set 1889# CONFIG_HPFS_FS is not set
1223# CONFIG_QNX4FS_FS is not set 1890# CONFIG_QNX4FS_FS is not set
1891# CONFIG_ROMFS_FS is not set
1224# CONFIG_SYSV_FS is not set 1892# CONFIG_SYSV_FS is not set
1225# CONFIG_UFS_FS is not set 1893# CONFIG_UFS_FS is not set
1226 1894CONFIG_NETWORK_FILESYSTEMS=y
1227# 1895# CONFIG_NFS_FS is not set
1228# Network File Systems 1896# CONFIG_NFSD is not set
1229#
1230CONFIG_NFS_FS=y
1231CONFIG_NFS_V3=y
1232# CONFIG_NFS_V3_ACL is not set
1233# CONFIG_NFS_V4 is not set
1234# CONFIG_NFS_DIRECTIO is not set
1235CONFIG_NFSD=y
1236CONFIG_NFSD_V3=y
1237# CONFIG_NFSD_V3_ACL is not set
1238# CONFIG_NFSD_V4 is not set
1239CONFIG_NFSD_TCP=y
1240CONFIG_ROOT_NFS=y
1241CONFIG_LOCKD=y
1242CONFIG_LOCKD_V4=y
1243CONFIG_EXPORTFS=y
1244CONFIG_NFS_COMMON=y
1245CONFIG_SUNRPC=y
1246# CONFIG_SUNRPC_BIND34 is not set
1247# CONFIG_RPCSEC_GSS_KRB5 is not set
1248# CONFIG_RPCSEC_GSS_SPKM3 is not set
1249# CONFIG_SMB_FS is not set 1897# CONFIG_SMB_FS is not set
1250# CONFIG_CIFS is not set 1898# CONFIG_CIFS is not set
1251# CONFIG_NCP_FS is not set 1899# CONFIG_NCP_FS is not set
@@ -1255,14 +1903,26 @@ CONFIG_SUNRPC=y
1255# 1903#
1256# Partition Types 1904# Partition Types
1257# 1905#
1258# CONFIG_PARTITION_ADVANCED is not set 1906CONFIG_PARTITION_ADVANCED=y
1907# CONFIG_ACORN_PARTITION is not set
1908CONFIG_OSF_PARTITION=y
1909CONFIG_AMIGA_PARTITION=y
1910# CONFIG_ATARI_PARTITION is not set
1911CONFIG_MAC_PARTITION=y
1259CONFIG_MSDOS_PARTITION=y 1912CONFIG_MSDOS_PARTITION=y
1260 1913CONFIG_BSD_DISKLABEL=y
1261# 1914CONFIG_MINIX_SUBPARTITION=y
1262# Native Language Support 1915CONFIG_SOLARIS_X86_PARTITION=y
1263# 1916CONFIG_UNIXWARE_DISKLABEL=y
1917# CONFIG_LDM_PARTITION is not set
1918CONFIG_SGI_PARTITION=y
1919# CONFIG_ULTRIX_PARTITION is not set
1920CONFIG_SUN_PARTITION=y
1921CONFIG_KARMA_PARTITION=y
1922CONFIG_EFI_PARTITION=y
1923# CONFIG_SYSV68_PARTITION is not set
1264CONFIG_NLS=y 1924CONFIG_NLS=y
1265CONFIG_NLS_DEFAULT="iso8859-1" 1925CONFIG_NLS_DEFAULT="utf8"
1266CONFIG_NLS_CODEPAGE_437=y 1926CONFIG_NLS_CODEPAGE_437=y
1267# CONFIG_NLS_CODEPAGE_737 is not set 1927# CONFIG_NLS_CODEPAGE_737 is not set
1268# CONFIG_NLS_CODEPAGE_775 is not set 1928# CONFIG_NLS_CODEPAGE_775 is not set
@@ -1297,40 +1957,33 @@ CONFIG_NLS_ISO8859_1=y
1297# CONFIG_NLS_ISO8859_9 is not set 1957# CONFIG_NLS_ISO8859_9 is not set
1298# CONFIG_NLS_ISO8859_13 is not set 1958# CONFIG_NLS_ISO8859_13 is not set
1299# CONFIG_NLS_ISO8859_14 is not set 1959# CONFIG_NLS_ISO8859_14 is not set
1300CONFIG_NLS_ISO8859_15=y 1960# CONFIG_NLS_ISO8859_15 is not set
1301# CONFIG_NLS_KOI8_R is not set 1961# CONFIG_NLS_KOI8_R is not set
1302# CONFIG_NLS_KOI8_U is not set 1962# CONFIG_NLS_KOI8_U is not set
1303CONFIG_NLS_UTF8=y 1963CONFIG_NLS_UTF8=y
1304
1305#
1306# Distributed Lock Manager
1307#
1308# CONFIG_DLM is not set 1964# CONFIG_DLM is not set
1309 1965
1310# 1966#
1311# Instrumentation Support
1312#
1313CONFIG_PROFILING=y
1314CONFIG_OPROFILE=y
1315CONFIG_KPROBES=y
1316
1317#
1318# Kernel hacking 1967# Kernel hacking
1319# 1968#
1320CONFIG_TRACE_IRQFLAGS_SUPPORT=y 1969CONFIG_TRACE_IRQFLAGS_SUPPORT=y
1321# CONFIG_PRINTK_TIME is not set 1970# CONFIG_PRINTK_TIME is not set
1971# CONFIG_ENABLE_WARN_DEPRECATED is not set
1322# CONFIG_ENABLE_MUST_CHECK is not set 1972# CONFIG_ENABLE_MUST_CHECK is not set
1973CONFIG_FRAME_WARN=2048
1323CONFIG_MAGIC_SYSRQ=y 1974CONFIG_MAGIC_SYSRQ=y
1324CONFIG_UNUSED_SYMBOLS=y 1975# CONFIG_UNUSED_SYMBOLS is not set
1325CONFIG_DEBUG_FS=y 1976CONFIG_DEBUG_FS=y
1326# CONFIG_HEADERS_CHECK is not set 1977# CONFIG_HEADERS_CHECK is not set
1327CONFIG_DEBUG_KERNEL=y 1978CONFIG_DEBUG_KERNEL=y
1328# CONFIG_DEBUG_SHIRQ is not set 1979# CONFIG_DEBUG_SHIRQ is not set
1329CONFIG_DETECT_SOFTLOCKUP=y 1980# CONFIG_DETECT_SOFTLOCKUP is not set
1330# CONFIG_SCHED_DEBUG is not set 1981# CONFIG_SCHED_DEBUG is not set
1331# CONFIG_SCHEDSTATS is not set 1982CONFIG_SCHEDSTATS=y
1332CONFIG_TIMER_STATS=y 1983CONFIG_TIMER_STATS=y
1333# CONFIG_DEBUG_SLAB is not set 1984# CONFIG_DEBUG_OBJECTS is not set
1985# CONFIG_SLUB_DEBUG_ON is not set
1986# CONFIG_SLUB_STATS is not set
1334# CONFIG_DEBUG_RT_MUTEXES is not set 1987# CONFIG_DEBUG_RT_MUTEXES is not set
1335# CONFIG_RT_MUTEX_TESTER is not set 1988# CONFIG_RT_MUTEX_TESTER is not set
1336# CONFIG_DEBUG_SPINLOCK is not set 1989# CONFIG_DEBUG_SPINLOCK is not set
@@ -1344,28 +1997,162 @@ CONFIG_TIMER_STATS=y
1344CONFIG_DEBUG_BUGVERBOSE=y 1997CONFIG_DEBUG_BUGVERBOSE=y
1345# CONFIG_DEBUG_INFO is not set 1998# CONFIG_DEBUG_INFO is not set
1346# CONFIG_DEBUG_VM is not set 1999# CONFIG_DEBUG_VM is not set
2000# CONFIG_DEBUG_WRITECOUNT is not set
1347# CONFIG_DEBUG_LIST is not set 2001# CONFIG_DEBUG_LIST is not set
1348# CONFIG_FRAME_POINTER is not set 2002# CONFIG_DEBUG_SG is not set
1349CONFIG_OPTIMIZE_INLINING=y 2003CONFIG_FRAME_POINTER=y
2004# CONFIG_BOOT_PRINTK_DELAY is not set
1350# CONFIG_RCU_TORTURE_TEST is not set 2005# CONFIG_RCU_TORTURE_TEST is not set
2006# CONFIG_KPROBES_SANITY_TEST is not set
2007# CONFIG_BACKTRACE_SELF_TEST is not set
1351# CONFIG_LKDTM is not set 2008# CONFIG_LKDTM is not set
1352# CONFIG_FAULT_INJECTION is not set 2009# CONFIG_FAULT_INJECTION is not set
1353# CONFIG_DEBUG_RODATA is not set 2010# CONFIG_LATENCYTOP is not set
1354# CONFIG_IOMMU_DEBUG is not set 2011CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
2012# CONFIG_SAMPLES is not set
2013# CONFIG_KGDB is not set
2014CONFIG_HAVE_ARCH_KGDB=y
2015# CONFIG_NONPROMISC_DEVMEM is not set
2016CONFIG_EARLY_PRINTK=y
1355CONFIG_DEBUG_STACKOVERFLOW=y 2017CONFIG_DEBUG_STACKOVERFLOW=y
1356# CONFIG_DEBUG_STACK_USAGE is not set 2018CONFIG_DEBUG_STACK_USAGE=y
2019# CONFIG_DEBUG_PAGEALLOC is not set
2020# CONFIG_DEBUG_PER_CPU_MAPS is not set
2021# CONFIG_X86_PTDUMP is not set
2022CONFIG_DEBUG_RODATA=y
2023# CONFIG_DIRECT_GBPAGES is not set
2024# CONFIG_DEBUG_RODATA_TEST is not set
2025CONFIG_DEBUG_NX_TEST=m
2026CONFIG_X86_MPPARSE=y
2027# CONFIG_IOMMU_DEBUG is not set
2028CONFIG_IO_DELAY_TYPE_0X80=0
2029CONFIG_IO_DELAY_TYPE_0XED=1
2030CONFIG_IO_DELAY_TYPE_UDELAY=2
2031CONFIG_IO_DELAY_TYPE_NONE=3
2032CONFIG_IO_DELAY_0X80=y
2033# CONFIG_IO_DELAY_0XED is not set
2034# CONFIG_IO_DELAY_UDELAY is not set
2035# CONFIG_IO_DELAY_NONE is not set
2036CONFIG_DEFAULT_IO_DELAY_TYPE=0
2037CONFIG_DEBUG_BOOT_PARAMS=y
2038# CONFIG_CPA_DEBUG is not set
1357 2039
1358# 2040#
1359# Security options 2041# Security options
1360# 2042#
1361# CONFIG_KEYS is not set 2043CONFIG_KEYS=y
1362# CONFIG_SECURITY is not set 2044CONFIG_KEYS_DEBUG_PROC_KEYS=y
1363# CONFIG_CRYPTO is not set 2045CONFIG_SECURITY=y
2046CONFIG_SECURITY_NETWORK=y
2047# CONFIG_SECURITY_NETWORK_XFRM is not set
2048CONFIG_SECURITY_CAPABILITIES=y
2049CONFIG_SECURITY_FILE_CAPABILITIES=y
2050# CONFIG_SECURITY_ROOTPLUG is not set
2051CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
2052CONFIG_SECURITY_SELINUX=y
2053CONFIG_SECURITY_SELINUX_BOOTPARAM=y
2054CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1
2055CONFIG_SECURITY_SELINUX_DISABLE=y
2056CONFIG_SECURITY_SELINUX_DEVELOP=y
2057CONFIG_SECURITY_SELINUX_AVC_STATS=y
2058CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
2059# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set
2060# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
2061# CONFIG_SECURITY_SMACK is not set
2062CONFIG_CRYPTO=y
2063
2064#
2065# Crypto core or helper
2066#
2067CONFIG_CRYPTO_ALGAPI=y
2068CONFIG_CRYPTO_AEAD=y
2069CONFIG_CRYPTO_BLKCIPHER=y
2070CONFIG_CRYPTO_HASH=y
2071CONFIG_CRYPTO_MANAGER=y
2072# CONFIG_CRYPTO_GF128MUL is not set
2073# CONFIG_CRYPTO_NULL is not set
2074# CONFIG_CRYPTO_CRYPTD is not set
2075CONFIG_CRYPTO_AUTHENC=y
2076# CONFIG_CRYPTO_TEST is not set
2077
2078#
2079# Authenticated Encryption with Associated Data
2080#
2081# CONFIG_CRYPTO_CCM is not set
2082# CONFIG_CRYPTO_GCM is not set
2083# CONFIG_CRYPTO_SEQIV is not set
2084
2085#
2086# Block modes
2087#
2088CONFIG_CRYPTO_CBC=y
2089# CONFIG_CRYPTO_CTR is not set
2090# CONFIG_CRYPTO_CTS is not set
2091CONFIG_CRYPTO_ECB=y
2092# CONFIG_CRYPTO_LRW is not set
2093# CONFIG_CRYPTO_PCBC is not set
2094# CONFIG_CRYPTO_XTS is not set
2095
2096#
2097# Hash modes
2098#
2099CONFIG_CRYPTO_HMAC=y
2100# CONFIG_CRYPTO_XCBC is not set
2101
2102#
2103# Digest
2104#
2105# CONFIG_CRYPTO_CRC32C is not set
2106# CONFIG_CRYPTO_MD4 is not set
2107CONFIG_CRYPTO_MD5=y
2108# CONFIG_CRYPTO_MICHAEL_MIC is not set
2109CONFIG_CRYPTO_SHA1=y
2110# CONFIG_CRYPTO_SHA256 is not set
2111# CONFIG_CRYPTO_SHA512 is not set
2112# CONFIG_CRYPTO_TGR192 is not set
2113# CONFIG_CRYPTO_WP512 is not set
2114
2115#
2116# Ciphers
2117#
2118CONFIG_CRYPTO_AES=y
2119# CONFIG_CRYPTO_AES_X86_64 is not set
2120# CONFIG_CRYPTO_ANUBIS is not set
2121CONFIG_CRYPTO_ARC4=y
2122# CONFIG_CRYPTO_BLOWFISH is not set
2123# CONFIG_CRYPTO_CAMELLIA is not set
2124# CONFIG_CRYPTO_CAST5 is not set
2125# CONFIG_CRYPTO_CAST6 is not set
2126CONFIG_CRYPTO_DES=y
2127# CONFIG_CRYPTO_FCRYPT is not set
2128# CONFIG_CRYPTO_KHAZAD is not set
2129# CONFIG_CRYPTO_SALSA20 is not set
2130# CONFIG_CRYPTO_SALSA20_X86_64 is not set
2131# CONFIG_CRYPTO_SEED is not set
2132# CONFIG_CRYPTO_SERPENT is not set
2133# CONFIG_CRYPTO_TEA is not set
2134# CONFIG_CRYPTO_TWOFISH is not set
2135# CONFIG_CRYPTO_TWOFISH_X86_64 is not set
2136
2137#
2138# Compression
2139#
2140# CONFIG_CRYPTO_DEFLATE is not set
2141# CONFIG_CRYPTO_LZO is not set
2142CONFIG_CRYPTO_HW=y
2143# CONFIG_CRYPTO_DEV_HIFN_795X is not set
2144CONFIG_HAVE_KVM=y
2145CONFIG_VIRTUALIZATION=y
2146# CONFIG_KVM is not set
2147# CONFIG_VIRTIO_PCI is not set
2148# CONFIG_VIRTIO_BALLOON is not set
1364 2149
1365# 2150#
1366# Library routines 2151# Library routines
1367# 2152#
1368CONFIG_BITREVERSE=y 2153CONFIG_BITREVERSE=y
2154CONFIG_GENERIC_FIND_FIRST_BIT=y
2155CONFIG_GENERIC_FIND_NEXT_BIT=y
1369# CONFIG_CRC_CCITT is not set 2156# CONFIG_CRC_CCITT is not set
1370# CONFIG_CRC16 is not set 2157# CONFIG_CRC16 is not set
1371# CONFIG_CRC_ITU_T is not set 2158# CONFIG_CRC_ITU_T is not set
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b5e329da166c..20371d0635e4 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -61,6 +61,19 @@
61 CFI_UNDEFINED r15 61 CFI_UNDEFINED r15
62 .endm 62 .endm
63 63
64#ifdef CONFIG_PARAVIRT
65ENTRY(native_usergs_sysret32)
66 swapgs
67 sysretl
68ENDPROC(native_usergs_sysret32)
69
70ENTRY(native_irq_enable_sysexit)
71 swapgs
72 sti
73 sysexit
74ENDPROC(native_irq_enable_sysexit)
75#endif
76
64/* 77/*
65 * 32bit SYSENTER instruction entry. 78 * 32bit SYSENTER instruction entry.
66 * 79 *
@@ -85,14 +98,14 @@ ENTRY(ia32_sysenter_target)
85 CFI_SIGNAL_FRAME 98 CFI_SIGNAL_FRAME
86 CFI_DEF_CFA rsp,0 99 CFI_DEF_CFA rsp,0
87 CFI_REGISTER rsp,rbp 100 CFI_REGISTER rsp,rbp
88 swapgs 101 SWAPGS_UNSAFE_STACK
89 movq %gs:pda_kernelstack, %rsp 102 movq %gs:pda_kernelstack, %rsp
90 addq $(PDA_STACKOFFSET),%rsp 103 addq $(PDA_STACKOFFSET),%rsp
91 /* 104 /*
92 * No need to follow this irqs on/off section: the syscall 105 * No need to follow this irqs on/off section: the syscall
93 * disabled irqs, here we enable it straight after entry: 106 * disabled irqs, here we enable it straight after entry:
94 */ 107 */
95 sti 108 ENABLE_INTERRUPTS(CLBR_NONE)
96 movl %ebp,%ebp /* zero extension */ 109 movl %ebp,%ebp /* zero extension */
97 pushq $__USER32_DS 110 pushq $__USER32_DS
98 CFI_ADJUST_CFA_OFFSET 8 111 CFI_ADJUST_CFA_OFFSET 8
@@ -103,7 +116,7 @@ ENTRY(ia32_sysenter_target)
103 pushfq 116 pushfq
104 CFI_ADJUST_CFA_OFFSET 8 117 CFI_ADJUST_CFA_OFFSET 8
105 /*CFI_REL_OFFSET rflags,0*/ 118 /*CFI_REL_OFFSET rflags,0*/
106 movl 8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d 119 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
107 CFI_REGISTER rip,r10 120 CFI_REGISTER rip,r10
108 pushq $__USER32_CS 121 pushq $__USER32_CS
109 CFI_ADJUST_CFA_OFFSET 8 122 CFI_ADJUST_CFA_OFFSET 8
@@ -123,8 +136,9 @@ ENTRY(ia32_sysenter_target)
123 .quad 1b,ia32_badarg 136 .quad 1b,ia32_badarg
124 .previous 137 .previous
125 GET_THREAD_INFO(%r10) 138 GET_THREAD_INFO(%r10)
126 orl $TS_COMPAT,threadinfo_status(%r10) 139 orl $TS_COMPAT,TI_status(%r10)
127 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) 140 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
141 TI_flags(%r10)
128 CFI_REMEMBER_STATE 142 CFI_REMEMBER_STATE
129 jnz sysenter_tracesys 143 jnz sysenter_tracesys
130sysenter_do_call: 144sysenter_do_call:
@@ -134,11 +148,11 @@ sysenter_do_call:
134 call *ia32_sys_call_table(,%rax,8) 148 call *ia32_sys_call_table(,%rax,8)
135 movq %rax,RAX-ARGOFFSET(%rsp) 149 movq %rax,RAX-ARGOFFSET(%rsp)
136 GET_THREAD_INFO(%r10) 150 GET_THREAD_INFO(%r10)
137 cli 151 DISABLE_INTERRUPTS(CLBR_NONE)
138 TRACE_IRQS_OFF 152 TRACE_IRQS_OFF
139 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) 153 testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
140 jnz int_ret_from_sys_call 154 jnz int_ret_from_sys_call
141 andl $~TS_COMPAT,threadinfo_status(%r10) 155 andl $~TS_COMPAT,TI_status(%r10)
142 /* clear IF, that popfq doesn't enable interrupts early */ 156 /* clear IF, that popfq doesn't enable interrupts early */
143 andl $~0x200,EFLAGS-R11(%rsp) 157 andl $~0x200,EFLAGS-R11(%rsp)
144 movl RIP-R11(%rsp),%edx /* User %eip */ 158 movl RIP-R11(%rsp),%edx /* User %eip */
@@ -151,10 +165,7 @@ sysenter_do_call:
151 CFI_ADJUST_CFA_OFFSET -8 165 CFI_ADJUST_CFA_OFFSET -8
152 CFI_REGISTER rsp,rcx 166 CFI_REGISTER rsp,rcx
153 TRACE_IRQS_ON 167 TRACE_IRQS_ON
154 swapgs 168 ENABLE_INTERRUPTS_SYSEXIT32
155 sti /* sti only takes effect after the next instruction */
156 /* sysexit */
157 .byte 0xf, 0x35
158 169
159sysenter_tracesys: 170sysenter_tracesys:
160 CFI_RESTORE_STATE 171 CFI_RESTORE_STATE
@@ -200,7 +211,7 @@ ENTRY(ia32_cstar_target)
200 CFI_DEF_CFA rsp,PDA_STACKOFFSET 211 CFI_DEF_CFA rsp,PDA_STACKOFFSET
201 CFI_REGISTER rip,rcx 212 CFI_REGISTER rip,rcx
202 /*CFI_REGISTER rflags,r11*/ 213 /*CFI_REGISTER rflags,r11*/
203 swapgs 214 SWAPGS_UNSAFE_STACK
204 movl %esp,%r8d 215 movl %esp,%r8d
205 CFI_REGISTER rsp,r8 216 CFI_REGISTER rsp,r8
206 movq %gs:pda_kernelstack,%rsp 217 movq %gs:pda_kernelstack,%rsp
@@ -208,7 +219,7 @@ ENTRY(ia32_cstar_target)
208 * No need to follow this irqs on/off section: the syscall 219 * No need to follow this irqs on/off section: the syscall
209 * disabled irqs and here we enable it straight after entry: 220 * disabled irqs and here we enable it straight after entry:
210 */ 221 */
211 sti 222 ENABLE_INTERRUPTS(CLBR_NONE)
212 SAVE_ARGS 8,1,1 223 SAVE_ARGS 8,1,1
213 movl %eax,%eax /* zero extension */ 224 movl %eax,%eax /* zero extension */
214 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 225 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
@@ -230,8 +241,9 @@ ENTRY(ia32_cstar_target)
230 .quad 1b,ia32_badarg 241 .quad 1b,ia32_badarg
231 .previous 242 .previous
232 GET_THREAD_INFO(%r10) 243 GET_THREAD_INFO(%r10)
233 orl $TS_COMPAT,threadinfo_status(%r10) 244 orl $TS_COMPAT,TI_status(%r10)
234 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) 245 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
246 TI_flags(%r10)
235 CFI_REMEMBER_STATE 247 CFI_REMEMBER_STATE
236 jnz cstar_tracesys 248 jnz cstar_tracesys
237cstar_do_call: 249cstar_do_call:
@@ -241,11 +253,11 @@ cstar_do_call:
241 call *ia32_sys_call_table(,%rax,8) 253 call *ia32_sys_call_table(,%rax,8)
242 movq %rax,RAX-ARGOFFSET(%rsp) 254 movq %rax,RAX-ARGOFFSET(%rsp)
243 GET_THREAD_INFO(%r10) 255 GET_THREAD_INFO(%r10)
244 cli 256 DISABLE_INTERRUPTS(CLBR_NONE)
245 TRACE_IRQS_OFF 257 TRACE_IRQS_OFF
246 testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) 258 testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
247 jnz int_ret_from_sys_call 259 jnz int_ret_from_sys_call
248 andl $~TS_COMPAT,threadinfo_status(%r10) 260 andl $~TS_COMPAT,TI_status(%r10)
249 RESTORE_ARGS 1,-ARG_SKIP,1,1,1 261 RESTORE_ARGS 1,-ARG_SKIP,1,1,1
250 movl RIP-ARGOFFSET(%rsp),%ecx 262 movl RIP-ARGOFFSET(%rsp),%ecx
251 CFI_REGISTER rip,rcx 263 CFI_REGISTER rip,rcx
@@ -254,8 +266,7 @@ cstar_do_call:
254 TRACE_IRQS_ON 266 TRACE_IRQS_ON
255 movl RSP-ARGOFFSET(%rsp),%esp 267 movl RSP-ARGOFFSET(%rsp),%esp
256 CFI_RESTORE rsp 268 CFI_RESTORE rsp
257 swapgs 269 USERGS_SYSRET32
258 sysretl
259 270
260cstar_tracesys: 271cstar_tracesys:
261 CFI_RESTORE_STATE 272 CFI_RESTORE_STATE
@@ -310,12 +321,12 @@ ENTRY(ia32_syscall)
310 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ 321 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
311 /*CFI_REL_OFFSET cs,CS-RIP*/ 322 /*CFI_REL_OFFSET cs,CS-RIP*/
312 CFI_REL_OFFSET rip,RIP-RIP 323 CFI_REL_OFFSET rip,RIP-RIP
313 swapgs 324 SWAPGS
314 /* 325 /*
315 * No need to follow this irqs on/off section: the syscall 326 * No need to follow this irqs on/off section: the syscall
316 * disabled irqs and here we enable it straight after entry: 327 * disabled irqs and here we enable it straight after entry:
317 */ 328 */
318 sti 329 ENABLE_INTERRUPTS(CLBR_NONE)
319 movl %eax,%eax 330 movl %eax,%eax
320 pushq %rax 331 pushq %rax
321 CFI_ADJUST_CFA_OFFSET 8 332 CFI_ADJUST_CFA_OFFSET 8
@@ -324,8 +335,9 @@ ENTRY(ia32_syscall)
324 this could be a problem. */ 335 this could be a problem. */
325 SAVE_ARGS 0,0,1 336 SAVE_ARGS 0,0,1
326 GET_THREAD_INFO(%r10) 337 GET_THREAD_INFO(%r10)
327 orl $TS_COMPAT,threadinfo_status(%r10) 338 orl $TS_COMPAT,TI_status(%r10)
328 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) 339 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
340 TI_flags(%r10)
329 jnz ia32_tracesys 341 jnz ia32_tracesys
330ia32_do_syscall: 342ia32_do_syscall:
331 cmpl $(IA32_NR_syscalls-1),%eax 343 cmpl $(IA32_NR_syscalls-1),%eax
@@ -370,13 +382,11 @@ quiet_ni_syscall:
370 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi 382 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
371 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi 383 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
372 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx 384 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
373 PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
374 PTREGSCALL stub32_execve, sys32_execve, %rcx 385 PTREGSCALL stub32_execve, sys32_execve, %rcx
375 PTREGSCALL stub32_fork, sys_fork, %rdi 386 PTREGSCALL stub32_fork, sys_fork, %rdi
376 PTREGSCALL stub32_clone, sys32_clone, %rdx 387 PTREGSCALL stub32_clone, sys32_clone, %rdx
377 PTREGSCALL stub32_vfork, sys_vfork, %rdi 388 PTREGSCALL stub32_vfork, sys_vfork, %rdi
378 PTREGSCALL stub32_iopl, sys_iopl, %rsi 389 PTREGSCALL stub32_iopl, sys_iopl, %rsi
379 PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
380 390
381ENTRY(ia32_ptregs_common) 391ENTRY(ia32_ptregs_common)
382 popq %r11 392 popq %r11
@@ -476,7 +486,7 @@ ia32_sys_call_table:
476 .quad sys_ssetmask 486 .quad sys_ssetmask
477 .quad sys_setreuid16 /* 70 */ 487 .quad sys_setreuid16 /* 70 */
478 .quad sys_setregid16 488 .quad sys_setregid16
479 .quad stub32_sigsuspend 489 .quad sys32_sigsuspend
480 .quad compat_sys_sigpending 490 .quad compat_sys_sigpending
481 .quad sys_sethostname 491 .quad sys_sethostname
482 .quad compat_sys_setrlimit /* 75 */ 492 .quad compat_sys_setrlimit /* 75 */
@@ -583,7 +593,7 @@ ia32_sys_call_table:
583 .quad sys32_rt_sigpending 593 .quad sys32_rt_sigpending
584 .quad compat_sys_rt_sigtimedwait 594 .quad compat_sys_rt_sigtimedwait
585 .quad sys32_rt_sigqueueinfo 595 .quad sys32_rt_sigqueueinfo
586 .quad stub32_rt_sigsuspend 596 .quad sys_rt_sigsuspend
587 .quad sys32_pread /* 180 */ 597 .quad sys32_pread /* 180 */
588 .quad sys32_pwrite 598 .quad sys32_pwrite
589 .quad sys_chown16 599 .quad sys_chown16
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 77807d4769c9..55ff016e9f69 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the linux kernel. 2# Makefile for the linux kernel.
3# 3#
4 4
5extra-y := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds 5extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
@@ -13,20 +13,21 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
13nostackp := $(call cc-option, -fno-stack-protector) 13nostackp := $(call cc-option, -fno-stack-protector)
14CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 14CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
15CFLAGS_hpet.o := $(nostackp) 15CFLAGS_hpet.o := $(nostackp)
16CFLAGS_tsc_64.o := $(nostackp) 16CFLAGS_tsc.o := $(nostackp)
17 17
18obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 18obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
19obj-y += traps_$(BITS).o irq_$(BITS).o 19obj-y += traps_$(BITS).o irq_$(BITS).o
20obj-y += time_$(BITS).o ioport.o ldt.o 20obj-y += time_$(BITS).o ioport.o ldt.o
21obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o 21obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
22obj-$(CONFIG_X86_VISWS) += visws_quirks.o
23obj-$(CONFIG_X86_32) += probe_roms_32.o
22obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 24obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
23obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 25obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
24obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o 26obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
25obj-y += bootflag.o e820_$(BITS).o 27obj-y += bootflag.o e820.o
26obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 28obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
27obj-y += alternative.o i8253.o pci-nommu.o 29obj-y += alternative.o i8253.o pci-nommu.o
28obj-$(CONFIG_X86_64) += bugs_64.o 30obj-y += tsc.o io_delay.o rtc.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o
30 31
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 32obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
32obj-y += process.o 33obj-y += process.o
@@ -53,7 +54,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o
53obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 54obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
54obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 55obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
55obj-$(CONFIG_X86_MPPARSE) += mpparse.o 56obj-$(CONFIG_X86_MPPARSE) += mpparse.o
56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi_$(BITS).o 57obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 58obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 59obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
59obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 60obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
@@ -64,7 +65,6 @@ obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
64obj-y += vsmp_64.o 65obj-y += vsmp_64.o
65obj-$(CONFIG_KPROBES) += kprobes.o 66obj-$(CONFIG_KPROBES) += kprobes.o
66obj-$(CONFIG_MODULES) += module_$(BITS).o 67obj-$(CONFIG_MODULES) += module_$(BITS).o
67obj-$(CONFIG_ACPI_SRAT) += srat_32.o
68obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o 68obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
69obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o 69obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
70obj-$(CONFIG_KGDB) += kgdb.o 70obj-$(CONFIG_KGDB) += kgdb.o
@@ -94,12 +94,13 @@ obj-$(CONFIG_OLPC) += olpc.o
94### 94###
95# 64 bit specific files 95# 64 bit specific files
96ifeq ($(CONFIG_X86_64),y) 96ifeq ($(CONFIG_X86_64),y)
97 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o 97 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
98 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 98 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
99 obj-$(CONFIG_AUDIT) += audit_64.o 99 obj-$(CONFIG_AUDIT) += audit_64.o
100 100
101 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 101 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
102 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 102 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
103 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
103 obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o 104 obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o
104 105
105 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 106 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 33c5216fd3e1..785700a08e9d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -37,6 +37,7 @@
37#include <asm/pgtable.h> 37#include <asm/pgtable.h>
38#include <asm/io_apic.h> 38#include <asm/io_apic.h>
39#include <asm/apic.h> 39#include <asm/apic.h>
40#include <asm/genapic.h>
40#include <asm/io.h> 41#include <asm/io.h>
41#include <asm/mpspec.h> 42#include <asm/mpspec.h>
42#include <asm/smp.h> 43#include <asm/smp.h>
@@ -106,21 +107,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
106 */ 107 */
107enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; 108enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
108 109
109#ifdef CONFIG_X86_64
110
111/* rely on all ACPI tables being in the direct mapping */
112char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
113{
114 if (!phys_addr || !size)
115 return NULL;
116
117 if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
118 return __va(phys_addr);
119
120 return NULL;
121}
122
123#else
124 110
125/* 111/*
126 * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, 112 * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
@@ -139,11 +125,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
139 unsigned long base, offset, mapped_size; 125 unsigned long base, offset, mapped_size;
140 int idx; 126 int idx;
141 127
142 if (phys + size < 8 * 1024 * 1024) 128 if (!phys || !size)
129 return NULL;
130
131 if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
143 return __va(phys); 132 return __va(phys);
144 133
145 offset = phys & (PAGE_SIZE - 1); 134 offset = phys & (PAGE_SIZE - 1);
146 mapped_size = PAGE_SIZE - offset; 135 mapped_size = PAGE_SIZE - offset;
136 clear_fixmap(FIX_ACPI_END);
147 set_fixmap(FIX_ACPI_END, phys); 137 set_fixmap(FIX_ACPI_END, phys);
148 base = fix_to_virt(FIX_ACPI_END); 138 base = fix_to_virt(FIX_ACPI_END);
149 139
@@ -155,13 +145,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
155 if (--idx < FIX_ACPI_BEGIN) 145 if (--idx < FIX_ACPI_BEGIN)
156 return NULL; /* cannot handle this */ 146 return NULL; /* cannot handle this */
157 phys += PAGE_SIZE; 147 phys += PAGE_SIZE;
148 clear_fixmap(idx);
158 set_fixmap(idx, phys); 149 set_fixmap(idx, phys);
159 mapped_size += PAGE_SIZE; 150 mapped_size += PAGE_SIZE;
160 } 151 }
161 152
162 return ((unsigned char *)base + offset); 153 return ((unsigned char *)base + offset);
163} 154}
164#endif
165 155
166#ifdef CONFIG_PCI_MMCONFIG 156#ifdef CONFIG_PCI_MMCONFIG
167/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ 157/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
@@ -338,8 +328,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
338 328
339#ifdef CONFIG_X86_IO_APIC 329#ifdef CONFIG_X86_IO_APIC
340 330
341struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
342
343static int __init 331static int __init
344acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) 332acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
345{ 333{
@@ -514,8 +502,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
514 * Make sure all (legacy) PCI IRQs are set as level-triggered. 502 * Make sure all (legacy) PCI IRQs are set as level-triggered.
515 */ 503 */
516 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { 504 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
517 extern void eisa_set_level_irq(unsigned int irq);
518
519 if (triggering == ACPI_LEVEL_SENSITIVE) 505 if (triggering == ACPI_LEVEL_SENSITIVE)
520 eisa_set_level_irq(gsi); 506 eisa_set_level_irq(gsi);
521 } 507 }
@@ -860,6 +846,364 @@ static int __init acpi_parse_madt_lapic_entries(void)
860#endif /* CONFIG_X86_LOCAL_APIC */ 846#endif /* CONFIG_X86_LOCAL_APIC */
861 847
862#ifdef CONFIG_X86_IO_APIC 848#ifdef CONFIG_X86_IO_APIC
849#define MP_ISA_BUS 0
850
851#ifdef CONFIG_X86_ES7000
852extern int es7000_plat;
853#endif
854
855static struct {
856 int apic_id;
857 int gsi_base;
858 int gsi_end;
859 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
860} mp_ioapic_routing[MAX_IO_APICS];
861
862static int mp_find_ioapic(int gsi)
863{
864 int i = 0;
865
866 /* Find the IOAPIC that manages this GSI. */
867 for (i = 0; i < nr_ioapics; i++) {
868 if ((gsi >= mp_ioapic_routing[i].gsi_base)
869 && (gsi <= mp_ioapic_routing[i].gsi_end))
870 return i;
871 }
872
873 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
874 return -1;
875}
876
877static u8 __init uniq_ioapic_id(u8 id)
878{
879#ifdef CONFIG_X86_32
880 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
881 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
882 return io_apic_get_unique_id(nr_ioapics, id);
883 else
884 return id;
885#else
886 int i;
887 DECLARE_BITMAP(used, 256);
888 bitmap_zero(used, 256);
889 for (i = 0; i < nr_ioapics; i++) {
890 struct mp_config_ioapic *ia = &mp_ioapics[i];
891 __set_bit(ia->mp_apicid, used);
892 }
893 if (!test_bit(id, used))
894 return id;
895 return find_first_zero_bit(used, 256);
896#endif
897}
898
899static int bad_ioapic(unsigned long address)
900{
901 if (nr_ioapics >= MAX_IO_APICS) {
902 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
903 "(found %d)\n", MAX_IO_APICS, nr_ioapics);
904 panic("Recompile kernel with bigger MAX_IO_APICS!\n");
905 }
906 if (!address) {
907 printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
908 " found in table, skipping!\n");
909 return 1;
910 }
911 return 0;
912}
913
914void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
915{
916 int idx = 0;
917
918 if (bad_ioapic(address))
919 return;
920
921 idx = nr_ioapics;
922
923 mp_ioapics[idx].mp_type = MP_IOAPIC;
924 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
925 mp_ioapics[idx].mp_apicaddr = address;
926
927 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
928 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
929#ifdef CONFIG_X86_32
930 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
931#else
932 mp_ioapics[idx].mp_apicver = 0;
933#endif
934 /*
935 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
936 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
937 */
938 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
939 mp_ioapic_routing[idx].gsi_base = gsi_base;
940 mp_ioapic_routing[idx].gsi_end = gsi_base +
941 io_apic_get_redir_entries(idx);
942
943 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
944 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
945 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
946 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
947
948 nr_ioapics++;
949}
950
951static void assign_to_mp_irq(struct mp_config_intsrc *m,
952 struct mp_config_intsrc *mp_irq)
953{
954 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
955}
956
957static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
958 struct mp_config_intsrc *m)
959{
960 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
961}
962
963static void save_mp_irq(struct mp_config_intsrc *m)
964{
965 int i;
966
967 for (i = 0; i < mp_irq_entries; i++) {
968 if (!mp_irq_cmp(&mp_irqs[i], m))
969 return;
970 }
971
972 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
973 if (++mp_irq_entries == MAX_IRQ_SOURCES)
974 panic("Max # of irq sources exceeded!!\n");
975}
976
977void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
978{
979 int ioapic;
980 int pin;
981 struct mp_config_intsrc mp_irq;
982
983 /*
984 * Convert 'gsi' to 'ioapic.pin'.
985 */
986 ioapic = mp_find_ioapic(gsi);
987 if (ioapic < 0)
988 return;
989 pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
990
991 /*
992 * TBD: This check is for faulty timer entries, where the override
993 * erroneously sets the trigger to level, resulting in a HUGE
994 * increase of timer interrupts!
995 */
996 if ((bus_irq == 0) && (trigger == 3))
997 trigger = 1;
998
999 mp_irq.mp_type = MP_INTSRC;
1000 mp_irq.mp_irqtype = mp_INT;
1001 mp_irq.mp_irqflag = (trigger << 2) | polarity;
1002 mp_irq.mp_srcbus = MP_ISA_BUS;
1003 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */
1004 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
1005 mp_irq.mp_dstirq = pin; /* INTIN# */
1006
1007 save_mp_irq(&mp_irq);
1008}
1009
1010void __init mp_config_acpi_legacy_irqs(void)
1011{
1012 int i;
1013 int ioapic;
1014 unsigned int dstapic;
1015 struct mp_config_intsrc mp_irq;
1016
1017#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1018 /*
1019 * Fabricate the legacy ISA bus (bus #31).
1020 */
1021 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
1022#endif
1023 set_bit(MP_ISA_BUS, mp_bus_not_pci);
1024 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
1025
1026#ifdef CONFIG_X86_ES7000
1027 /*
1028 * Older generations of ES7000 have no legacy identity mappings
1029 */
1030 if (es7000_plat == 1)
1031 return;
1032#endif
1033
1034 /*
1035 * Locate the IOAPIC that manages the ISA IRQs (0-15).
1036 */
1037 ioapic = mp_find_ioapic(0);
1038 if (ioapic < 0)
1039 return;
1040 dstapic = mp_ioapics[ioapic].mp_apicid;
1041
1042 /*
1043 * Use the default configuration for the IRQs 0-15. Unless
1044 * overridden by (MADT) interrupt source override entries.
1045 */
1046 for (i = 0; i < 16; i++) {
1047 int idx;
1048
1049 for (idx = 0; idx < mp_irq_entries; idx++) {
1050 struct mp_config_intsrc *irq = mp_irqs + idx;
1051
1052 /* Do we already have a mapping for this ISA IRQ? */
1053 if (irq->mp_srcbus == MP_ISA_BUS
1054 && irq->mp_srcbusirq == i)
1055 break;
1056
1057 /* Do we already have a mapping for this IOAPIC pin */
1058 if (irq->mp_dstapic == dstapic &&
1059 irq->mp_dstirq == i)
1060 break;
1061 }
1062
1063 if (idx != mp_irq_entries) {
1064 printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
1065 continue; /* IRQ already used */
1066 }
1067
1068 mp_irq.mp_type = MP_INTSRC;
1069 mp_irq.mp_irqflag = 0; /* Conforming */
1070 mp_irq.mp_srcbus = MP_ISA_BUS;
1071 mp_irq.mp_dstapic = dstapic;
1072 mp_irq.mp_irqtype = mp_INT;
1073 mp_irq.mp_srcbusirq = i; /* Identity mapped */
1074 mp_irq.mp_dstirq = i;
1075
1076 save_mp_irq(&mp_irq);
1077 }
1078}
1079
1080int mp_register_gsi(u32 gsi, int triggering, int polarity)
1081{
1082 int ioapic;
1083 int ioapic_pin;
1084#ifdef CONFIG_X86_32
1085#define MAX_GSI_NUM 4096
1086#define IRQ_COMPRESSION_START 64
1087
1088 static int pci_irq = IRQ_COMPRESSION_START;
1089 /*
1090 * Mapping between Global System Interrupts, which
1091 * represent all possible interrupts, and IRQs
1092 * assigned to actual devices.
1093 */
1094 static int gsi_to_irq[MAX_GSI_NUM];
1095#else
1096
1097 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
1098 return gsi;
1099#endif
1100
1101 /* Don't set up the ACPI SCI because it's already set up */
1102 if (acpi_gbl_FADT.sci_interrupt == gsi)
1103 return gsi;
1104
1105 ioapic = mp_find_ioapic(gsi);
1106 if (ioapic < 0) {
1107 printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
1108 return gsi;
1109 }
1110
1111 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
1112
1113#ifdef CONFIG_X86_32
1114 if (ioapic_renumber_irq)
1115 gsi = ioapic_renumber_irq(ioapic, gsi);
1116#endif
1117
1118 /*
1119 * Avoid pin reprogramming. PRTs typically include entries
1120 * with redundant pin->gsi mappings (but unique PCI devices);
1121 * we only program the IOAPIC on the first.
1122 */
1123 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1124 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1125 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
1126 ioapic_pin);
1127 return gsi;
1128 }
1129 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
1130 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
1131 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
1132#ifdef CONFIG_X86_32
1133 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
1134#else
1135 return gsi;
1136#endif
1137 }
1138
1139 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
1140#ifdef CONFIG_X86_32
1141 /*
1142 * For GSI >= 64, use IRQ compression
1143 */
1144 if ((gsi >= IRQ_COMPRESSION_START)
1145 && (triggering == ACPI_LEVEL_SENSITIVE)) {
1146 /*
1147 * For PCI devices assign IRQs in order, avoiding gaps
1148 * due to unused I/O APIC pins.
1149 */
1150 int irq = gsi;
1151 if (gsi < MAX_GSI_NUM) {
1152 /*
1153 * Retain the VIA chipset work-around (gsi > 15), but
1154 * avoid a problem where the 8254 timer (IRQ0) is setup
1155 * via an override (so it's not on pin 0 of the ioapic),
1156 * and at the same time, the pin 0 interrupt is a PCI
1157 * type. The gsi > 15 test could cause these two pins
1158 * to be shared as IRQ0, and they are not shareable.
1159 * So test for this condition, and if necessary, avoid
1160 * the pin collision.
1161 */
1162 gsi = pci_irq++;
1163 /*
1164 * Don't assign IRQ used by ACPI SCI
1165 */
1166 if (gsi == acpi_gbl_FADT.sci_interrupt)
1167 gsi = pci_irq++;
1168 gsi_to_irq[irq] = gsi;
1169 } else {
1170 printk(KERN_ERR "GSI %u is too high\n", gsi);
1171 return gsi;
1172 }
1173 }
1174#endif
1175 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1176 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
1177 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1178 return gsi;
1179}
1180
1181int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1182 u32 gsi, int triggering, int polarity)
1183{
1184#ifdef CONFIG_X86_MPPARSE
1185 struct mp_config_intsrc mp_irq;
1186 int ioapic;
1187
1188 if (!acpi_ioapic)
1189 return 0;
1190
1191 /* print the entry should happen on mptable identically */
1192 mp_irq.mp_type = MP_INTSRC;
1193 mp_irq.mp_irqtype = mp_INT;
1194 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1195 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1196 mp_irq.mp_srcbus = number;
1197 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1198 ioapic = mp_find_ioapic(gsi);
1199 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
1200 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
1201
1202 save_mp_irq(&mp_irq);
1203#endif
1204 return 0;
1205}
1206
863/* 1207/*
864 * Parse IOAPIC related entries in MADT 1208 * Parse IOAPIC related entries in MADT
865 * returns 0 on success, < 0 on error 1209 * returns 0 on success, < 0 on error
@@ -1009,8 +1353,6 @@ static void __init acpi_process_madt(void)
1009 return; 1353 return;
1010} 1354}
1011 1355
1012#ifdef __i386__
1013
1014static int __init disable_acpi_irq(const struct dmi_system_id *d) 1356static int __init disable_acpi_irq(const struct dmi_system_id *d)
1015{ 1357{
1016 if (!acpi_force) { 1358 if (!acpi_force) {
@@ -1061,6 +1403,17 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
1061} 1403}
1062 1404
1063/* 1405/*
1406 * Force ignoring BIOS IRQ0 pin2 override
1407 */
1408static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
1409{
1410 pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
1411 acpi_skip_timer_override = 1;
1412 force_mask_ioapic_irq_2();
1413 return 0;
1414}
1415
1416/*
1064 * If your system is blacklisted here, but you find that acpi=force 1417 * If your system is blacklisted here, but you find that acpi=force
1065 * works for you, please contact acpi-devel@sourceforge.net 1418 * works for you, please contact acpi-devel@sourceforge.net
1066 */ 1419 */
@@ -1227,11 +1580,35 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1227 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), 1580 DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
1228 }, 1581 },
1229 }, 1582 },
1583 /*
1584 * HP laptops which use a DSDT reporting as HP/SB400/10000,
1585 * which includes some code which overrides all temperature
1586 * trip points to 16C if the INTIN2 input of the I/O APIC
1587 * is enabled. This input is incorrectly designated the
1588 * ISA IRQ 0 via an interrupt source override even though
1589 * it is wired to the output of the master 8259A and INTIN0
1590 * is not connected at all. Force ignoring BIOS IRQ0 pin2
1591 * override in that cases.
1592 */
1593 {
1594 .callback = dmi_ignore_irq0_timer_override,
1595 .ident = "HP NX6125 laptop",
1596 .matches = {
1597 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1598 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"),
1599 },
1600 },
1601 {
1602 .callback = dmi_ignore_irq0_timer_override,
1603 .ident = "HP NX6325 laptop",
1604 .matches = {
1605 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
1606 DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
1607 },
1608 },
1230 {} 1609 {}
1231}; 1610};
1232 1611
1233#endif /* __i386__ */
1234
1235/* 1612/*
1236 * acpi_boot_table_init() and acpi_boot_init() 1613 * acpi_boot_table_init() and acpi_boot_init()
1237 * called from setup_arch(), always. 1614 * called from setup_arch(), always.
@@ -1259,9 +1636,7 @@ int __init acpi_boot_table_init(void)
1259{ 1636{
1260 int error; 1637 int error;
1261 1638
1262#ifdef __i386__
1263 dmi_check_system(acpi_dmi_table); 1639 dmi_check_system(acpi_dmi_table);
1264#endif
1265 1640
1266 /* 1641 /*
1267 * If acpi_disabled, bail out 1642 * If acpi_disabled, bail out
@@ -1386,6 +1761,20 @@ static int __init parse_pci(char *arg)
1386} 1761}
1387early_param("pci", parse_pci); 1762early_param("pci", parse_pci);
1388 1763
1764int __init acpi_mps_check(void)
1765{
1766#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE)
1767/* mptable code is not built-in*/
1768 if (acpi_disabled || acpi_noirq) {
1769 printk(KERN_WARNING "MPS support code is not built-in.\n"
1770 "Using acpi=off or acpi=noirq or pci=noacpi "
1771 "may have problem\n");
1772 return 1;
1773 }
1774#endif
1775 return 0;
1776}
1777
1389#ifdef CONFIG_X86_IO_APIC 1778#ifdef CONFIG_X86_IO_APIC
1390static int __init parse_acpi_skip_timer_override(char *arg) 1779static int __init parse_acpi_skip_timer_override(char *arg)
1391{ 1780{
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 36af01f029ed..e6a4b564ccaa 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -86,7 +86,9 @@ int acpi_save_state_mem(void)
86 saved_magic = 0x12345678; 86 saved_magic = 0x12345678;
87#else /* CONFIG_64BIT */ 87#else /* CONFIG_64BIT */
88 header->trampoline_segment = setup_trampoline() >> 4; 88 header->trampoline_segment = setup_trampoline() >> 4;
89 init_rsp = (unsigned long)temp_stack + 4096; 89#ifdef CONFIG_SMP
90 stack_start.sp = temp_stack + 4096;
91#endif
90 initial_code = (unsigned long)wakeup_long64; 92 initial_code = (unsigned long)wakeup_long64;
91 saved_magic = 0x123456789abcdef0; 93 saved_magic = 0x123456789abcdef0;
92#endif /* CONFIG_64BIT */ 94#endif /* CONFIG_64BIT */
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
new file mode 100644
index 000000000000..f2766d84c7a0
--- /dev/null
+++ b/arch/x86/kernel/amd_iommu.c
@@ -0,0 +1,962 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/gfp.h>
22#include <linux/bitops.h>
23#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h>
25#include <asm/proto.h>
26#include <asm/gart.h>
27#include <asm/amd_iommu_types.h>
28#include <asm/amd_iommu.h>
29
30#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
31
32#define to_pages(addr, size) \
33 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
34
35static DEFINE_RWLOCK(amd_iommu_devtable_lock);
36
37struct command {
38 u32 data[4];
39};
40
41static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
42 struct unity_map_entry *e);
43
44static int iommu_has_npcache(struct amd_iommu *iommu)
45{
46 return iommu->cap & IOMMU_CAP_NPCACHE;
47}
48
49static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
50{
51 u32 tail, head;
52 u8 *target;
53
54 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
55 target = (iommu->cmd_buf + tail);
56 memcpy_toio(target, cmd, sizeof(*cmd));
57 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
58 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
59 if (tail == head)
60 return -ENOMEM;
61 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
62
63 return 0;
64}
65
66static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
67{
68 unsigned long flags;
69 int ret;
70
71 spin_lock_irqsave(&iommu->lock, flags);
72 ret = __iommu_queue_command(iommu, cmd);
73 spin_unlock_irqrestore(&iommu->lock, flags);
74
75 return ret;
76}
77
78static int iommu_completion_wait(struct amd_iommu *iommu)
79{
80 int ret;
81 struct command cmd;
82 volatile u64 ready = 0;
83 unsigned long ready_phys = virt_to_phys(&ready);
84
85 memset(&cmd, 0, sizeof(cmd));
86 cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
87 cmd.data[1] = HIGH_U32(ready_phys);
88 cmd.data[2] = 1; /* value written to 'ready' */
89 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
90
91 iommu->need_sync = 0;
92
93 ret = iommu_queue_command(iommu, &cmd);
94
95 if (ret)
96 return ret;
97
98 while (!ready)
99 cpu_relax();
100
101 return 0;
102}
103
104static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
105{
106 struct command cmd;
107
108 BUG_ON(iommu == NULL);
109
110 memset(&cmd, 0, sizeof(cmd));
111 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
112 cmd.data[0] = devid;
113
114 iommu->need_sync = 1;
115
116 return iommu_queue_command(iommu, &cmd);
117}
118
119static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
120 u64 address, u16 domid, int pde, int s)
121{
122 struct command cmd;
123
124 memset(&cmd, 0, sizeof(cmd));
125 address &= PAGE_MASK;
126 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
127 cmd.data[1] |= domid;
128 cmd.data[2] = LOW_U32(address);
129 cmd.data[3] = HIGH_U32(address);
130 if (s)
131 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
132 if (pde)
133 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
134
135 iommu->need_sync = 1;
136
137 return iommu_queue_command(iommu, &cmd);
138}
139
140static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
141 u64 address, size_t size)
142{
143 int s = 0;
144 unsigned pages = to_pages(address, size);
145
146 address &= PAGE_MASK;
147
148 if (pages > 1) {
149 /*
150 * If we have to flush more than one page, flush all
151 * TLB entries for this domain
152 */
153 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
154 s = 1;
155 }
156
157 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
158
159 return 0;
160}
161
162static int iommu_map(struct protection_domain *dom,
163 unsigned long bus_addr,
164 unsigned long phys_addr,
165 int prot)
166{
167 u64 __pte, *pte, *page;
168
169 bus_addr = PAGE_ALIGN(bus_addr);
170 phys_addr = PAGE_ALIGN(bus_addr);
171
172 /* only support 512GB address spaces for now */
173 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
174 return -EINVAL;
175
176 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
177
178 if (!IOMMU_PTE_PRESENT(*pte)) {
179 page = (u64 *)get_zeroed_page(GFP_KERNEL);
180 if (!page)
181 return -ENOMEM;
182 *pte = IOMMU_L2_PDE(virt_to_phys(page));
183 }
184
185 pte = IOMMU_PTE_PAGE(*pte);
186 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
187
188 if (!IOMMU_PTE_PRESENT(*pte)) {
189 page = (u64 *)get_zeroed_page(GFP_KERNEL);
190 if (!page)
191 return -ENOMEM;
192 *pte = IOMMU_L1_PDE(virt_to_phys(page));
193 }
194
195 pte = IOMMU_PTE_PAGE(*pte);
196 pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
197
198 if (IOMMU_PTE_PRESENT(*pte))
199 return -EBUSY;
200
201 __pte = phys_addr | IOMMU_PTE_P;
202 if (prot & IOMMU_PROT_IR)
203 __pte |= IOMMU_PTE_IR;
204 if (prot & IOMMU_PROT_IW)
205 __pte |= IOMMU_PTE_IW;
206
207 *pte = __pte;
208
209 return 0;
210}
211
212static int iommu_for_unity_map(struct amd_iommu *iommu,
213 struct unity_map_entry *entry)
214{
215 u16 bdf, i;
216
217 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
218 bdf = amd_iommu_alias_table[i];
219 if (amd_iommu_rlookup_table[bdf] == iommu)
220 return 1;
221 }
222
223 return 0;
224}
225
226static int iommu_init_unity_mappings(struct amd_iommu *iommu)
227{
228 struct unity_map_entry *entry;
229 int ret;
230
231 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
232 if (!iommu_for_unity_map(iommu, entry))
233 continue;
234 ret = dma_ops_unity_map(iommu->default_dom, entry);
235 if (ret)
236 return ret;
237 }
238
239 return 0;
240}
241
242static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
243 struct unity_map_entry *e)
244{
245 u64 addr;
246 int ret;
247
248 for (addr = e->address_start; addr < e->address_end;
249 addr += PAGE_SIZE) {
250 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
251 if (ret)
252 return ret;
253 /*
254 * if unity mapping is in aperture range mark the page
255 * as allocated in the aperture
256 */
257 if (addr < dma_dom->aperture_size)
258 __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
259 }
260
261 return 0;
262}
263
264static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
265 u16 devid)
266{
267 struct unity_map_entry *e;
268 int ret;
269
270 list_for_each_entry(e, &amd_iommu_unity_map, list) {
271 if (!(devid >= e->devid_start && devid <= e->devid_end))
272 continue;
273 ret = dma_ops_unity_map(dma_dom, e);
274 if (ret)
275 return ret;
276 }
277
278 return 0;
279}
280
281static unsigned long dma_mask_to_pages(unsigned long mask)
282{
283 return (mask >> PAGE_SHIFT) +
284 (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
285}
286
287static unsigned long dma_ops_alloc_addresses(struct device *dev,
288 struct dma_ops_domain *dom,
289 unsigned int pages)
290{
291 unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
292 unsigned long address;
293 unsigned long size = dom->aperture_size >> PAGE_SHIFT;
294 unsigned long boundary_size;
295
296 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
297 PAGE_SIZE) >> PAGE_SHIFT;
298 limit = limit < size ? limit : size;
299
300 if (dom->next_bit >= limit)
301 dom->next_bit = 0;
302
303 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
304 0 , boundary_size, 0);
305 if (address == -1)
306 address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
307 0, boundary_size, 0);
308
309 if (likely(address != -1)) {
310 dom->next_bit = address + pages;
311 address <<= PAGE_SHIFT;
312 } else
313 address = bad_dma_address;
314
315 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
316
317 return address;
318}
319
320static void dma_ops_free_addresses(struct dma_ops_domain *dom,
321 unsigned long address,
322 unsigned int pages)
323{
324 address >>= PAGE_SHIFT;
325 iommu_area_free(dom->bitmap, address, pages);
326}
327
328static u16 domain_id_alloc(void)
329{
330 unsigned long flags;
331 int id;
332
333 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
334 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
335 BUG_ON(id == 0);
336 if (id > 0 && id < MAX_DOMAIN_ID)
337 __set_bit(id, amd_iommu_pd_alloc_bitmap);
338 else
339 id = 0;
340 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
341
342 return id;
343}
344
345static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
346 unsigned long start_page,
347 unsigned int pages)
348{
349 unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
350
351 if (start_page + pages > last_page)
352 pages = last_page - start_page;
353
354 set_bit_string(dom->bitmap, start_page, pages);
355}
356
357static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
358{
359 int i, j;
360 u64 *p1, *p2, *p3;
361
362 p1 = dma_dom->domain.pt_root;
363
364 if (!p1)
365 return;
366
367 for (i = 0; i < 512; ++i) {
368 if (!IOMMU_PTE_PRESENT(p1[i]))
369 continue;
370
371 p2 = IOMMU_PTE_PAGE(p1[i]);
372 for (j = 0; j < 512; ++i) {
373 if (!IOMMU_PTE_PRESENT(p2[j]))
374 continue;
375 p3 = IOMMU_PTE_PAGE(p2[j]);
376 free_page((unsigned long)p3);
377 }
378
379 free_page((unsigned long)p2);
380 }
381
382 free_page((unsigned long)p1);
383}
384
385static void dma_ops_domain_free(struct dma_ops_domain *dom)
386{
387 if (!dom)
388 return;
389
390 dma_ops_free_pagetable(dom);
391
392 kfree(dom->pte_pages);
393
394 kfree(dom->bitmap);
395
396 kfree(dom);
397}
398
399static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
400 unsigned order)
401{
402 struct dma_ops_domain *dma_dom;
403 unsigned i, num_pte_pages;
404 u64 *l2_pde;
405 u64 address;
406
407 /*
408 * Currently the DMA aperture must be between 32 MB and 1GB in size
409 */
410 if ((order < 25) || (order > 30))
411 return NULL;
412
413 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
414 if (!dma_dom)
415 return NULL;
416
417 spin_lock_init(&dma_dom->domain.lock);
418
419 dma_dom->domain.id = domain_id_alloc();
420 if (dma_dom->domain.id == 0)
421 goto free_dma_dom;
422 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
423 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
424 dma_dom->domain.priv = dma_dom;
425 if (!dma_dom->domain.pt_root)
426 goto free_dma_dom;
427 dma_dom->aperture_size = (1ULL << order);
428 dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
429 GFP_KERNEL);
430 if (!dma_dom->bitmap)
431 goto free_dma_dom;
432 /*
433 * mark the first page as allocated so we never return 0 as
434 * a valid dma-address. So we can use 0 as error value
435 */
436 dma_dom->bitmap[0] = 1;
437 dma_dom->next_bit = 0;
438
439 if (iommu->exclusion_start &&
440 iommu->exclusion_start < dma_dom->aperture_size) {
441 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
442 int pages = to_pages(iommu->exclusion_start,
443 iommu->exclusion_length);
444 dma_ops_reserve_addresses(dma_dom, startpage, pages);
445 }
446
447 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
448 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
449 GFP_KERNEL);
450 if (!dma_dom->pte_pages)
451 goto free_dma_dom;
452
453 l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
454 if (l2_pde == NULL)
455 goto free_dma_dom;
456
457 dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
458
459 for (i = 0; i < num_pte_pages; ++i) {
460 dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
461 if (!dma_dom->pte_pages[i])
462 goto free_dma_dom;
463 address = virt_to_phys(dma_dom->pte_pages[i]);
464 l2_pde[i] = IOMMU_L1_PDE(address);
465 }
466
467 return dma_dom;
468
469free_dma_dom:
470 dma_ops_domain_free(dma_dom);
471
472 return NULL;
473}
474
475static struct protection_domain *domain_for_device(u16 devid)
476{
477 struct protection_domain *dom;
478 unsigned long flags;
479
480 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
481 dom = amd_iommu_pd_table[devid];
482 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
483
484 return dom;
485}
486
487static void set_device_domain(struct amd_iommu *iommu,
488 struct protection_domain *domain,
489 u16 devid)
490{
491 unsigned long flags;
492
493 u64 pte_root = virt_to_phys(domain->pt_root);
494
495 pte_root |= (domain->mode & 0x07) << 9;
496 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
497
498 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
499 amd_iommu_dev_table[devid].data[0] = pte_root;
500 amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
501 amd_iommu_dev_table[devid].data[2] = domain->id;
502
503 amd_iommu_pd_table[devid] = domain;
504 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
505
506 iommu_queue_inv_dev_entry(iommu, devid);
507
508 iommu->need_sync = 1;
509}
510
511static int get_device_resources(struct device *dev,
512 struct amd_iommu **iommu,
513 struct protection_domain **domain,
514 u16 *bdf)
515{
516 struct dma_ops_domain *dma_dom;
517 struct pci_dev *pcidev;
518 u16 _bdf;
519
520 BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
521
522 pcidev = to_pci_dev(dev);
523 _bdf = (pcidev->bus->number << 8) | pcidev->devfn;
524
525 if (_bdf >= amd_iommu_last_bdf) {
526 *iommu = NULL;
527 *domain = NULL;
528 *bdf = 0xffff;
529 return 0;
530 }
531
532 *bdf = amd_iommu_alias_table[_bdf];
533
534 *iommu = amd_iommu_rlookup_table[*bdf];
535 if (*iommu == NULL)
536 return 0;
537 dma_dom = (*iommu)->default_dom;
538 *domain = domain_for_device(*bdf);
539 if (*domain == NULL) {
540 *domain = &dma_dom->domain;
541 set_device_domain(*iommu, *domain, *bdf);
542 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
543 "device ", (*domain)->id);
544 print_devid(_bdf, 1);
545 }
546
547 return 1;
548}
549
550static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
551 struct dma_ops_domain *dom,
552 unsigned long address,
553 phys_addr_t paddr,
554 int direction)
555{
556 u64 *pte, __pte;
557
558 WARN_ON(address > dom->aperture_size);
559
560 paddr &= PAGE_MASK;
561
562 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
563 pte += IOMMU_PTE_L0_INDEX(address);
564
565 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
566
567 if (direction == DMA_TO_DEVICE)
568 __pte |= IOMMU_PTE_IR;
569 else if (direction == DMA_FROM_DEVICE)
570 __pte |= IOMMU_PTE_IW;
571 else if (direction == DMA_BIDIRECTIONAL)
572 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
573
574 WARN_ON(*pte);
575
576 *pte = __pte;
577
578 return (dma_addr_t)address;
579}
580
581static void dma_ops_domain_unmap(struct amd_iommu *iommu,
582 struct dma_ops_domain *dom,
583 unsigned long address)
584{
585 u64 *pte;
586
587 if (address >= dom->aperture_size)
588 return;
589
590 WARN_ON(address & 0xfffULL || address > dom->aperture_size);
591
592 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
593 pte += IOMMU_PTE_L0_INDEX(address);
594
595 WARN_ON(!*pte);
596
597 *pte = 0ULL;
598}
599
600static dma_addr_t __map_single(struct device *dev,
601 struct amd_iommu *iommu,
602 struct dma_ops_domain *dma_dom,
603 phys_addr_t paddr,
604 size_t size,
605 int dir)
606{
607 dma_addr_t offset = paddr & ~PAGE_MASK;
608 dma_addr_t address, start;
609 unsigned int pages;
610 int i;
611
612 pages = to_pages(paddr, size);
613 paddr &= PAGE_MASK;
614
615 address = dma_ops_alloc_addresses(dev, dma_dom, pages);
616 if (unlikely(address == bad_dma_address))
617 goto out;
618
619 start = address;
620 for (i = 0; i < pages; ++i) {
621 dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
622 paddr += PAGE_SIZE;
623 start += PAGE_SIZE;
624 }
625 address += offset;
626
627out:
628 return address;
629}
630
631static void __unmap_single(struct amd_iommu *iommu,
632 struct dma_ops_domain *dma_dom,
633 dma_addr_t dma_addr,
634 size_t size,
635 int dir)
636{
637 dma_addr_t i, start;
638 unsigned int pages;
639
640 if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
641 return;
642
643 pages = to_pages(dma_addr, size);
644 dma_addr &= PAGE_MASK;
645 start = dma_addr;
646
647 for (i = 0; i < pages; ++i) {
648 dma_ops_domain_unmap(iommu, dma_dom, start);
649 start += PAGE_SIZE;
650 }
651
652 dma_ops_free_addresses(dma_dom, dma_addr, pages);
653}
654
655static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
656 size_t size, int dir)
657{
658 unsigned long flags;
659 struct amd_iommu *iommu;
660 struct protection_domain *domain;
661 u16 devid;
662 dma_addr_t addr;
663
664 get_device_resources(dev, &iommu, &domain, &devid);
665
666 if (iommu == NULL || domain == NULL)
667 return (dma_addr_t)paddr;
668
669 spin_lock_irqsave(&domain->lock, flags);
670 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
671 if (addr == bad_dma_address)
672 goto out;
673
674 if (iommu_has_npcache(iommu))
675 iommu_flush_pages(iommu, domain->id, addr, size);
676
677 if (iommu->need_sync)
678 iommu_completion_wait(iommu);
679
680out:
681 spin_unlock_irqrestore(&domain->lock, flags);
682
683 return addr;
684}
685
686static void unmap_single(struct device *dev, dma_addr_t dma_addr,
687 size_t size, int dir)
688{
689 unsigned long flags;
690 struct amd_iommu *iommu;
691 struct protection_domain *domain;
692 u16 devid;
693
694 if (!get_device_resources(dev, &iommu, &domain, &devid))
695 return;
696
697 spin_lock_irqsave(&domain->lock, flags);
698
699 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
700
701 iommu_flush_pages(iommu, domain->id, dma_addr, size);
702
703 if (iommu->need_sync)
704 iommu_completion_wait(iommu);
705
706 spin_unlock_irqrestore(&domain->lock, flags);
707}
708
709static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
710 int nelems, int dir)
711{
712 struct scatterlist *s;
713 int i;
714
715 for_each_sg(sglist, s, nelems, i) {
716 s->dma_address = (dma_addr_t)sg_phys(s);
717 s->dma_length = s->length;
718 }
719
720 return nelems;
721}
722
723static int map_sg(struct device *dev, struct scatterlist *sglist,
724 int nelems, int dir)
725{
726 unsigned long flags;
727 struct amd_iommu *iommu;
728 struct protection_domain *domain;
729 u16 devid;
730 int i;
731 struct scatterlist *s;
732 phys_addr_t paddr;
733 int mapped_elems = 0;
734
735 get_device_resources(dev, &iommu, &domain, &devid);
736
737 if (!iommu || !domain)
738 return map_sg_no_iommu(dev, sglist, nelems, dir);
739
740 spin_lock_irqsave(&domain->lock, flags);
741
742 for_each_sg(sglist, s, nelems, i) {
743 paddr = sg_phys(s);
744
745 s->dma_address = __map_single(dev, iommu, domain->priv,
746 paddr, s->length, dir);
747
748 if (s->dma_address) {
749 s->dma_length = s->length;
750 mapped_elems++;
751 } else
752 goto unmap;
753 if (iommu_has_npcache(iommu))
754 iommu_flush_pages(iommu, domain->id, s->dma_address,
755 s->dma_length);
756 }
757
758 if (iommu->need_sync)
759 iommu_completion_wait(iommu);
760
761out:
762 spin_unlock_irqrestore(&domain->lock, flags);
763
764 return mapped_elems;
765unmap:
766 for_each_sg(sglist, s, mapped_elems, i) {
767 if (s->dma_address)
768 __unmap_single(iommu, domain->priv, s->dma_address,
769 s->dma_length, dir);
770 s->dma_address = s->dma_length = 0;
771 }
772
773 mapped_elems = 0;
774
775 goto out;
776}
777
778static void unmap_sg(struct device *dev, struct scatterlist *sglist,
779 int nelems, int dir)
780{
781 unsigned long flags;
782 struct amd_iommu *iommu;
783 struct protection_domain *domain;
784 struct scatterlist *s;
785 u16 devid;
786 int i;
787
788 if (!get_device_resources(dev, &iommu, &domain, &devid))
789 return;
790
791 spin_lock_irqsave(&domain->lock, flags);
792
793 for_each_sg(sglist, s, nelems, i) {
794 __unmap_single(iommu, domain->priv, s->dma_address,
795 s->dma_length, dir);
796 iommu_flush_pages(iommu, domain->id, s->dma_address,
797 s->dma_length);
798 s->dma_address = s->dma_length = 0;
799 }
800
801 if (iommu->need_sync)
802 iommu_completion_wait(iommu);
803
804 spin_unlock_irqrestore(&domain->lock, flags);
805}
806
807static void *alloc_coherent(struct device *dev, size_t size,
808 dma_addr_t *dma_addr, gfp_t flag)
809{
810 unsigned long flags;
811 void *virt_addr;
812 struct amd_iommu *iommu;
813 struct protection_domain *domain;
814 u16 devid;
815 phys_addr_t paddr;
816
817 virt_addr = (void *)__get_free_pages(flag, get_order(size));
818 if (!virt_addr)
819 return 0;
820
821 memset(virt_addr, 0, size);
822 paddr = virt_to_phys(virt_addr);
823
824 get_device_resources(dev, &iommu, &domain, &devid);
825
826 if (!iommu || !domain) {
827 *dma_addr = (dma_addr_t)paddr;
828 return virt_addr;
829 }
830
831 spin_lock_irqsave(&domain->lock, flags);
832
833 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
834 size, DMA_BIDIRECTIONAL);
835
836 if (*dma_addr == bad_dma_address) {
837 free_pages((unsigned long)virt_addr, get_order(size));
838 virt_addr = NULL;
839 goto out;
840 }
841
842 if (iommu_has_npcache(iommu))
843 iommu_flush_pages(iommu, domain->id, *dma_addr, size);
844
845 if (iommu->need_sync)
846 iommu_completion_wait(iommu);
847
848out:
849 spin_unlock_irqrestore(&domain->lock, flags);
850
851 return virt_addr;
852}
853
854static void free_coherent(struct device *dev, size_t size,
855 void *virt_addr, dma_addr_t dma_addr)
856{
857 unsigned long flags;
858 struct amd_iommu *iommu;
859 struct protection_domain *domain;
860 u16 devid;
861
862 get_device_resources(dev, &iommu, &domain, &devid);
863
864 if (!iommu || !domain)
865 goto free_mem;
866
867 spin_lock_irqsave(&domain->lock, flags);
868
869 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
870 iommu_flush_pages(iommu, domain->id, dma_addr, size);
871
872 if (iommu->need_sync)
873 iommu_completion_wait(iommu);
874
875 spin_unlock_irqrestore(&domain->lock, flags);
876
877free_mem:
878 free_pages((unsigned long)virt_addr, get_order(size));
879}
880
881/*
882 * If the driver core informs the DMA layer if a driver grabs a device
883 * we don't need to preallocate the protection domains anymore.
884 * For now we have to.
885 */
886void prealloc_protection_domains(void)
887{
888 struct pci_dev *dev = NULL;
889 struct dma_ops_domain *dma_dom;
890 struct amd_iommu *iommu;
891 int order = amd_iommu_aperture_order;
892 u16 devid;
893
894 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
895 devid = (dev->bus->number << 8) | dev->devfn;
896 if (devid >= amd_iommu_last_bdf)
897 continue;
898 devid = amd_iommu_alias_table[devid];
899 if (domain_for_device(devid))
900 continue;
901 iommu = amd_iommu_rlookup_table[devid];
902 if (!iommu)
903 continue;
904 dma_dom = dma_ops_domain_alloc(iommu, order);
905 if (!dma_dom)
906 continue;
907 init_unity_mappings_for_device(dma_dom, devid);
908 set_device_domain(iommu, &dma_dom->domain, devid);
909 printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
910 dma_dom->domain.id);
911 print_devid(devid, 1);
912 }
913}
914
915static struct dma_mapping_ops amd_iommu_dma_ops = {
916 .alloc_coherent = alloc_coherent,
917 .free_coherent = free_coherent,
918 .map_single = map_single,
919 .unmap_single = unmap_single,
920 .map_sg = map_sg,
921 .unmap_sg = unmap_sg,
922};
923
924int __init amd_iommu_init_dma_ops(void)
925{
926 struct amd_iommu *iommu;
927 int order = amd_iommu_aperture_order;
928 int ret;
929
930 list_for_each_entry(iommu, &amd_iommu_list, list) {
931 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
932 if (iommu->default_dom == NULL)
933 return -ENOMEM;
934 ret = iommu_init_unity_mappings(iommu);
935 if (ret)
936 goto free_domains;
937 }
938
939 if (amd_iommu_isolate)
940 prealloc_protection_domains();
941
942 iommu_detected = 1;
943 force_iommu = 1;
944 bad_dma_address = 0;
945#ifdef CONFIG_GART_IOMMU
946 gart_iommu_aperture_disabled = 1;
947 gart_iommu_aperture = 0;
948#endif
949
950 dma_ops = &amd_iommu_dma_ops;
951
952 return 0;
953
954free_domains:
955
956 list_for_each_entry(iommu, &amd_iommu_list, list) {
957 if (iommu->default_dom)
958 dma_ops_domain_free(iommu->default_dom);
959 }
960
961 return ret;
962}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
new file mode 100644
index 000000000000..2a13e430437d
--- /dev/null
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -0,0 +1,875 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/acpi.h>
22#include <linux/gfp.h>
23#include <linux/list.h>
24#include <linux/sysdev.h>
25#include <asm/pci-direct.h>
26#include <asm/amd_iommu_types.h>
27#include <asm/amd_iommu.h>
28#include <asm/gart.h>
29
30/*
31 * definitions for the ACPI scanning code
32 */
33#define UPDATE_LAST_BDF(x) do {\
34 if ((x) > amd_iommu_last_bdf) \
35 amd_iommu_last_bdf = (x); \
36 } while (0);
37
38#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
39#define PCI_BUS(x) (((x) >> 8) & 0xff)
40#define IVRS_HEADER_LENGTH 48
41#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
42
43#define ACPI_IVHD_TYPE 0x10
44#define ACPI_IVMD_TYPE_ALL 0x20
45#define ACPI_IVMD_TYPE 0x21
46#define ACPI_IVMD_TYPE_RANGE 0x22
47
48#define IVHD_DEV_ALL 0x01
49#define IVHD_DEV_SELECT 0x02
50#define IVHD_DEV_SELECT_RANGE_START 0x03
51#define IVHD_DEV_RANGE_END 0x04
52#define IVHD_DEV_ALIAS 0x42
53#define IVHD_DEV_ALIAS_RANGE 0x43
54#define IVHD_DEV_EXT_SELECT 0x46
55#define IVHD_DEV_EXT_SELECT_RANGE 0x47
56
57#define IVHD_FLAG_HT_TUN_EN 0x00
58#define IVHD_FLAG_PASSPW_EN 0x01
59#define IVHD_FLAG_RESPASSPW_EN 0x02
60#define IVHD_FLAG_ISOC_EN 0x03
61
62#define IVMD_FLAG_EXCL_RANGE 0x08
63#define IVMD_FLAG_UNITY_MAP 0x01
64
65#define ACPI_DEVFLAG_INITPASS 0x01
66#define ACPI_DEVFLAG_EXTINT 0x02
67#define ACPI_DEVFLAG_NMI 0x04
68#define ACPI_DEVFLAG_SYSMGT1 0x10
69#define ACPI_DEVFLAG_SYSMGT2 0x20
70#define ACPI_DEVFLAG_LINT0 0x40
71#define ACPI_DEVFLAG_LINT1 0x80
72#define ACPI_DEVFLAG_ATSDIS 0x10000000
73
74struct ivhd_header {
75 u8 type;
76 u8 flags;
77 u16 length;
78 u16 devid;
79 u16 cap_ptr;
80 u64 mmio_phys;
81 u16 pci_seg;
82 u16 info;
83 u32 reserved;
84} __attribute__((packed));
85
86struct ivhd_entry {
87 u8 type;
88 u16 devid;
89 u8 flags;
90 u32 ext;
91} __attribute__((packed));
92
93struct ivmd_header {
94 u8 type;
95 u8 flags;
96 u16 length;
97 u16 devid;
98 u16 aux;
99 u64 resv;
100 u64 range_start;
101 u64 range_length;
102} __attribute__((packed));
103
104static int __initdata amd_iommu_detected;
105
106u16 amd_iommu_last_bdf;
107struct list_head amd_iommu_unity_map;
108unsigned amd_iommu_aperture_order = 26;
109int amd_iommu_isolate;
110
111struct list_head amd_iommu_list;
112struct dev_table_entry *amd_iommu_dev_table;
113u16 *amd_iommu_alias_table;
114struct amd_iommu **amd_iommu_rlookup_table;
115struct protection_domain **amd_iommu_pd_table;
116unsigned long *amd_iommu_pd_alloc_bitmap;
117
118static u32 dev_table_size;
119static u32 alias_table_size;
120static u32 rlookup_table_size;
121
122static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
123{
124 u64 start = iommu->exclusion_start & PAGE_MASK;
125 u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
126 u64 entry;
127
128 if (!iommu->exclusion_start)
129 return;
130
131 entry = start | MMIO_EXCL_ENABLE_MASK;
132 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
133 &entry, sizeof(entry));
134
135 entry = limit;
136 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
137 &entry, sizeof(entry));
138}
139
140static void __init iommu_set_device_table(struct amd_iommu *iommu)
141{
142 u32 entry;
143
144 BUG_ON(iommu->mmio_base == NULL);
145
146 entry = virt_to_phys(amd_iommu_dev_table);
147 entry |= (dev_table_size >> 12) - 1;
148 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
149 &entry, sizeof(entry));
150}
151
152static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
153{
154 u32 ctrl;
155
156 ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
157 ctrl |= (1 << bit);
158 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
159}
160
161static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
162{
163 u32 ctrl;
164
165 ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
166 ctrl &= ~(1 << bit);
167 writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
168}
169
170void __init iommu_enable(struct amd_iommu *iommu)
171{
172 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
173 print_devid(iommu->devid, 0);
174 printk(" cap 0x%hx\n", iommu->cap_ptr);
175
176 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
177}
178
179static u8 * __init iommu_map_mmio_space(u64 address)
180{
181 u8 *ret;
182
183 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
184 return NULL;
185
186 ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
187 if (ret != NULL)
188 return ret;
189
190 release_mem_region(address, MMIO_REGION_LENGTH);
191
192 return NULL;
193}
194
195static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
196{
197 if (iommu->mmio_base)
198 iounmap(iommu->mmio_base);
199 release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
200}
201
202static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
203{
204 u32 cap;
205
206 cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
207 UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
208
209 return 0;
210}
211
212static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
213{
214 u8 *p = (void *)h, *end = (void *)h;
215 struct ivhd_entry *dev;
216
217 p += sizeof(*h);
218 end += h->length;
219
220 find_last_devid_on_pci(PCI_BUS(h->devid),
221 PCI_SLOT(h->devid),
222 PCI_FUNC(h->devid),
223 h->cap_ptr);
224
225 while (p < end) {
226 dev = (struct ivhd_entry *)p;
227 switch (dev->type) {
228 case IVHD_DEV_SELECT:
229 case IVHD_DEV_RANGE_END:
230 case IVHD_DEV_ALIAS:
231 case IVHD_DEV_EXT_SELECT:
232 UPDATE_LAST_BDF(dev->devid);
233 break;
234 default:
235 break;
236 }
237 p += 0x04 << (*p >> 6);
238 }
239
240 WARN_ON(p != end);
241
242 return 0;
243}
244
245static int __init find_last_devid_acpi(struct acpi_table_header *table)
246{
247 int i;
248 u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
249 struct ivhd_header *h;
250
251 /*
252 * Validate checksum here so we don't need to do it when
253 * we actually parse the table
254 */
255 for (i = 0; i < table->length; ++i)
256 checksum += p[i];
257 if (checksum != 0)
258 /* ACPI table corrupt */
259 return -ENODEV;
260
261 p += IVRS_HEADER_LENGTH;
262
263 end += table->length;
264 while (p < end) {
265 h = (struct ivhd_header *)p;
266 switch (h->type) {
267 case ACPI_IVHD_TYPE:
268 find_last_devid_from_ivhd(h);
269 break;
270 default:
271 break;
272 }
273 p += h->length;
274 }
275 WARN_ON(p != end);
276
277 return 0;
278}
279
280static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
281{
282 u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
283 get_order(CMD_BUFFER_SIZE));
284 u64 entry = 0;
285
286 if (cmd_buf == NULL)
287 return NULL;
288
289 iommu->cmd_buf_size = CMD_BUFFER_SIZE;
290
291 memset(cmd_buf, 0, CMD_BUFFER_SIZE);
292
293 entry = (u64)virt_to_phys(cmd_buf);
294 entry |= MMIO_CMD_SIZE_512;
295 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
296 &entry, sizeof(entry));
297
298 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
299
300 return cmd_buf;
301}
302
303static void __init free_command_buffer(struct amd_iommu *iommu)
304{
305 if (iommu->cmd_buf)
306 free_pages((unsigned long)iommu->cmd_buf,
307 get_order(CMD_BUFFER_SIZE));
308}
309
310static void set_dev_entry_bit(u16 devid, u8 bit)
311{
312 int i = (bit >> 5) & 0x07;
313 int _bit = bit & 0x1f;
314
315 amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
316}
317
318static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
319{
320 if (flags & ACPI_DEVFLAG_INITPASS)
321 set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
322 if (flags & ACPI_DEVFLAG_EXTINT)
323 set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
324 if (flags & ACPI_DEVFLAG_NMI)
325 set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
326 if (flags & ACPI_DEVFLAG_SYSMGT1)
327 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
328 if (flags & ACPI_DEVFLAG_SYSMGT2)
329 set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
330 if (flags & ACPI_DEVFLAG_LINT0)
331 set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
332 if (flags & ACPI_DEVFLAG_LINT1)
333 set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
334}
335
336static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
337{
338 amd_iommu_rlookup_table[devid] = iommu;
339}
340
341static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
342{
343 struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
344
345 if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
346 return;
347
348 if (iommu) {
349 set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
350 iommu->exclusion_start = m->range_start;
351 iommu->exclusion_length = m->range_length;
352 }
353}
354
355static void __init init_iommu_from_pci(struct amd_iommu *iommu)
356{
357 int bus = PCI_BUS(iommu->devid);
358 int dev = PCI_SLOT(iommu->devid);
359 int fn = PCI_FUNC(iommu->devid);
360 int cap_ptr = iommu->cap_ptr;
361 u32 range;
362
363 iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
364
365 range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
366 iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
367 iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
368}
369
370static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
371 struct ivhd_header *h)
372{
373 u8 *p = (u8 *)h;
374 u8 *end = p, flags = 0;
375 u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
376 u32 ext_flags = 0;
377 bool alias = 0;
378 struct ivhd_entry *e;
379
380 /*
381 * First set the recommended feature enable bits from ACPI
382 * into the IOMMU control registers
383 */
384 h->flags & IVHD_FLAG_HT_TUN_EN ?
385 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
386 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
387
388 h->flags & IVHD_FLAG_PASSPW_EN ?
389 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
390 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
391
392 h->flags & IVHD_FLAG_RESPASSPW_EN ?
393 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
394 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
395
396 h->flags & IVHD_FLAG_ISOC_EN ?
397 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
398 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
399
400 /*
401 * make IOMMU memory accesses cache coherent
402 */
403 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
404
405 /*
406 * Done. Now parse the device entries
407 */
408 p += sizeof(struct ivhd_header);
409 end += h->length;
410
411 while (p < end) {
412 e = (struct ivhd_entry *)p;
413 switch (e->type) {
414 case IVHD_DEV_ALL:
415 for (dev_i = iommu->first_device;
416 dev_i <= iommu->last_device; ++dev_i)
417 set_dev_entry_from_acpi(dev_i, e->flags, 0);
418 break;
419 case IVHD_DEV_SELECT:
420 devid = e->devid;
421 set_dev_entry_from_acpi(devid, e->flags, 0);
422 break;
423 case IVHD_DEV_SELECT_RANGE_START:
424 devid_start = e->devid;
425 flags = e->flags;
426 ext_flags = 0;
427 alias = 0;
428 break;
429 case IVHD_DEV_ALIAS:
430 devid = e->devid;
431 devid_to = e->ext >> 8;
432 set_dev_entry_from_acpi(devid, e->flags, 0);
433 amd_iommu_alias_table[devid] = devid_to;
434 break;
435 case IVHD_DEV_ALIAS_RANGE:
436 devid_start = e->devid;
437 flags = e->flags;
438 devid_to = e->ext >> 8;
439 ext_flags = 0;
440 alias = 1;
441 break;
442 case IVHD_DEV_EXT_SELECT:
443 devid = e->devid;
444 set_dev_entry_from_acpi(devid, e->flags, e->ext);
445 break;
446 case IVHD_DEV_EXT_SELECT_RANGE:
447 devid_start = e->devid;
448 flags = e->flags;
449 ext_flags = e->ext;
450 alias = 0;
451 break;
452 case IVHD_DEV_RANGE_END:
453 devid = e->devid;
454 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
455 if (alias)
456 amd_iommu_alias_table[dev_i] = devid_to;
457 set_dev_entry_from_acpi(
458 amd_iommu_alias_table[dev_i],
459 flags, ext_flags);
460 }
461 break;
462 default:
463 break;
464 }
465
466 p += 0x04 << (e->type >> 6);
467 }
468}
469
470static int __init init_iommu_devices(struct amd_iommu *iommu)
471{
472 u16 i;
473
474 for (i = iommu->first_device; i <= iommu->last_device; ++i)
475 set_iommu_for_device(iommu, i);
476
477 return 0;
478}
479
480static void __init free_iommu_one(struct amd_iommu *iommu)
481{
482 free_command_buffer(iommu);
483 iommu_unmap_mmio_space(iommu);
484}
485
486static void __init free_iommu_all(void)
487{
488 struct amd_iommu *iommu, *next;
489
490 list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
491 list_del(&iommu->list);
492 free_iommu_one(iommu);
493 kfree(iommu);
494 }
495}
496
497static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
498{
499 spin_lock_init(&iommu->lock);
500 list_add_tail(&iommu->list, &amd_iommu_list);
501
502 /*
503 * Copy data from ACPI table entry to the iommu struct
504 */
505 iommu->devid = h->devid;
506 iommu->cap_ptr = h->cap_ptr;
507 iommu->mmio_phys = h->mmio_phys;
508 iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
509 if (!iommu->mmio_base)
510 return -ENOMEM;
511
512 iommu_set_device_table(iommu);
513 iommu->cmd_buf = alloc_command_buffer(iommu);
514 if (!iommu->cmd_buf)
515 return -ENOMEM;
516
517 init_iommu_from_pci(iommu);
518 init_iommu_from_acpi(iommu, h);
519 init_iommu_devices(iommu);
520
521 return 0;
522}
523
524static int __init init_iommu_all(struct acpi_table_header *table)
525{
526 u8 *p = (u8 *)table, *end = (u8 *)table;
527 struct ivhd_header *h;
528 struct amd_iommu *iommu;
529 int ret;
530
531 INIT_LIST_HEAD(&amd_iommu_list);
532
533 end += table->length;
534 p += IVRS_HEADER_LENGTH;
535
536 while (p < end) {
537 h = (struct ivhd_header *)p;
538 switch (*p) {
539 case ACPI_IVHD_TYPE:
540 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
541 if (iommu == NULL)
542 return -ENOMEM;
543 ret = init_iommu_one(iommu, h);
544 if (ret)
545 return ret;
546 break;
547 default:
548 break;
549 }
550 p += h->length;
551
552 }
553 WARN_ON(p != end);
554
555 return 0;
556}
557
558static void __init free_unity_maps(void)
559{
560 struct unity_map_entry *entry, *next;
561
562 list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
563 list_del(&entry->list);
564 kfree(entry);
565 }
566}
567
568static int __init init_exclusion_range(struct ivmd_header *m)
569{
570 int i;
571
572 switch (m->type) {
573 case ACPI_IVMD_TYPE:
574 set_device_exclusion_range(m->devid, m);
575 break;
576 case ACPI_IVMD_TYPE_ALL:
577 for (i = 0; i < amd_iommu_last_bdf; ++i)
578 set_device_exclusion_range(i, m);
579 break;
580 case ACPI_IVMD_TYPE_RANGE:
581 for (i = m->devid; i <= m->aux; ++i)
582 set_device_exclusion_range(i, m);
583 break;
584 default:
585 break;
586 }
587
588 return 0;
589}
590
591static int __init init_unity_map_range(struct ivmd_header *m)
592{
593 struct unity_map_entry *e = 0;
594
595 e = kzalloc(sizeof(*e), GFP_KERNEL);
596 if (e == NULL)
597 return -ENOMEM;
598
599 switch (m->type) {
600 default:
601 case ACPI_IVMD_TYPE:
602 e->devid_start = e->devid_end = m->devid;
603 break;
604 case ACPI_IVMD_TYPE_ALL:
605 e->devid_start = 0;
606 e->devid_end = amd_iommu_last_bdf;
607 break;
608 case ACPI_IVMD_TYPE_RANGE:
609 e->devid_start = m->devid;
610 e->devid_end = m->aux;
611 break;
612 }
613 e->address_start = PAGE_ALIGN(m->range_start);
614 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
615 e->prot = m->flags >> 1;
616
617 list_add_tail(&e->list, &amd_iommu_unity_map);
618
619 return 0;
620}
621
622static int __init init_memory_definitions(struct acpi_table_header *table)
623{
624 u8 *p = (u8 *)table, *end = (u8 *)table;
625 struct ivmd_header *m;
626
627 INIT_LIST_HEAD(&amd_iommu_unity_map);
628
629 end += table->length;
630 p += IVRS_HEADER_LENGTH;
631
632 while (p < end) {
633 m = (struct ivmd_header *)p;
634 if (m->flags & IVMD_FLAG_EXCL_RANGE)
635 init_exclusion_range(m);
636 else if (m->flags & IVMD_FLAG_UNITY_MAP)
637 init_unity_map_range(m);
638
639 p += m->length;
640 }
641
642 return 0;
643}
644
645static void __init enable_iommus(void)
646{
647 struct amd_iommu *iommu;
648
649 list_for_each_entry(iommu, &amd_iommu_list, list) {
650 iommu_set_exclusion_range(iommu);
651 iommu_enable(iommu);
652 }
653}
654
655/*
656 * Suspend/Resume support
657 * disable suspend until real resume implemented
658 */
659
660static int amd_iommu_resume(struct sys_device *dev)
661{
662 return 0;
663}
664
665static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
666{
667 return -EINVAL;
668}
669
670static struct sysdev_class amd_iommu_sysdev_class = {
671 .name = "amd_iommu",
672 .suspend = amd_iommu_suspend,
673 .resume = amd_iommu_resume,
674};
675
676static struct sys_device device_amd_iommu = {
677 .id = 0,
678 .cls = &amd_iommu_sysdev_class,
679};
680
681int __init amd_iommu_init(void)
682{
683 int i, ret = 0;
684
685
686 if (no_iommu) {
687 printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
688 return 0;
689 }
690
691 if (!amd_iommu_detected)
692 return -ENODEV;
693
694 /*
695 * First parse ACPI tables to find the largest Bus/Dev/Func
696 * we need to handle. Upon this information the shared data
697 * structures for the IOMMUs in the system will be allocated
698 */
699 if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
700 return -ENODEV;
701
702 dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
703 alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
704 rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
705
706 ret = -ENOMEM;
707
708 /* Device table - directly used by all IOMMUs */
709 amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
710 get_order(dev_table_size));
711 if (amd_iommu_dev_table == NULL)
712 goto out;
713
714 /*
715 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
716 * IOMMU see for that device
717 */
718 amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
719 get_order(alias_table_size));
720 if (amd_iommu_alias_table == NULL)
721 goto free;
722
723 /* IOMMU rlookup table - find the IOMMU for a specific device */
724 amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
725 get_order(rlookup_table_size));
726 if (amd_iommu_rlookup_table == NULL)
727 goto free;
728
729 /*
730 * Protection Domain table - maps devices to protection domains
731 * This table has the same size as the rlookup_table
732 */
733 amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
734 get_order(rlookup_table_size));
735 if (amd_iommu_pd_table == NULL)
736 goto free;
737
738 amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
739 get_order(MAX_DOMAIN_ID/8));
740 if (amd_iommu_pd_alloc_bitmap == NULL)
741 goto free;
742
743 /*
744 * memory is allocated now; initialize the device table with all zeroes
745 * and let all alias entries point to itself
746 */
747 memset(amd_iommu_dev_table, 0, dev_table_size);
748 for (i = 0; i < amd_iommu_last_bdf; ++i)
749 amd_iommu_alias_table[i] = i;
750
751 memset(amd_iommu_pd_table, 0, rlookup_table_size);
752 memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
753
754 /*
755 * never allocate domain 0 because its used as the non-allocated and
756 * error value placeholder
757 */
758 amd_iommu_pd_alloc_bitmap[0] = 1;
759
760 /*
761 * now the data structures are allocated and basically initialized
762 * start the real acpi table scan
763 */
764 ret = -ENODEV;
765 if (acpi_table_parse("IVRS", init_iommu_all) != 0)
766 goto free;
767
768 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
769 goto free;
770
771 ret = amd_iommu_init_dma_ops();
772 if (ret)
773 goto free;
774
775 ret = sysdev_class_register(&amd_iommu_sysdev_class);
776 if (ret)
777 goto free;
778
779 ret = sysdev_register(&device_amd_iommu);
780 if (ret)
781 goto free;
782
783 enable_iommus();
784
785 printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
786 (1 << (amd_iommu_aperture_order-20)));
787
788 printk(KERN_INFO "AMD IOMMU: device isolation ");
789 if (amd_iommu_isolate)
790 printk("enabled\n");
791 else
792 printk("disabled\n");
793
794out:
795 return ret;
796
797free:
798 if (amd_iommu_pd_alloc_bitmap)
799 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
800
801 if (amd_iommu_pd_table)
802 free_pages((unsigned long)amd_iommu_pd_table,
803 get_order(rlookup_table_size));
804
805 if (amd_iommu_rlookup_table)
806 free_pages((unsigned long)amd_iommu_rlookup_table,
807 get_order(rlookup_table_size));
808
809 if (amd_iommu_alias_table)
810 free_pages((unsigned long)amd_iommu_alias_table,
811 get_order(alias_table_size));
812
813 if (amd_iommu_dev_table)
814 free_pages((unsigned long)amd_iommu_dev_table,
815 get_order(dev_table_size));
816
817 free_iommu_all();
818
819 free_unity_maps();
820
821 goto out;
822}
823
824static int __init early_amd_iommu_detect(struct acpi_table_header *table)
825{
826 return 0;
827}
828
829void __init amd_iommu_detect(void)
830{
831 if (swiotlb || no_iommu || iommu_detected)
832 return;
833
834 if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
835 iommu_detected = 1;
836 amd_iommu_detected = 1;
837#ifdef CONFIG_GART_IOMMU
838 gart_iommu_aperture_disabled = 1;
839 gart_iommu_aperture = 0;
840#endif
841 }
842}
843
844static int __init parse_amd_iommu_options(char *str)
845{
846 for (; *str; ++str) {
847 if (strcmp(str, "isolate") == 0)
848 amd_iommu_isolate = 1;
849 }
850
851 return 1;
852}
853
854static int __init parse_amd_iommu_size_options(char *str)
855{
856 for (; *str; ++str) {
857 if (strcmp(str, "32M") == 0)
858 amd_iommu_aperture_order = 25;
859 if (strcmp(str, "64M") == 0)
860 amd_iommu_aperture_order = 26;
861 if (strcmp(str, "128M") == 0)
862 amd_iommu_aperture_order = 27;
863 if (strcmp(str, "256M") == 0)
864 amd_iommu_aperture_order = 28;
865 if (strcmp(str, "512M") == 0)
866 amd_iommu_aperture_order = 29;
867 if (strcmp(str, "1G") == 0)
868 amd_iommu_aperture_order = 30;
869 }
870
871 return 1;
872}
873
874__setup("amd_iommu=", parse_amd_iommu_options);
875__setup("amd_iommu_size=", parse_amd_iommu_size_options);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 479926d9e004..9f907806c1a5 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -35,6 +35,18 @@ int fallback_aper_force __initdata;
35 35
36int fix_aperture __initdata = 1; 36int fix_aperture __initdata = 1;
37 37
38struct bus_dev_range {
39 int bus;
40 int dev_base;
41 int dev_limit;
42};
43
44static struct bus_dev_range bus_dev_ranges[] __initdata = {
45 { 0x00, 0x18, 0x20},
46 { 0xff, 0x00, 0x20},
47 { 0xfe, 0x00, 0x20}
48};
49
38static struct resource gart_resource = { 50static struct resource gart_resource = {
39 .name = "GART", 51 .name = "GART",
40 .flags = IORESOURCE_MEM, 52 .flags = IORESOURCE_MEM,
@@ -55,8 +67,9 @@ static u32 __init allocate_aperture(void)
55 u32 aper_size; 67 u32 aper_size;
56 void *p; 68 void *p;
57 69
58 if (fallback_aper_order > 7) 70 /* aper_size should <= 1G */
59 fallback_aper_order = 7; 71 if (fallback_aper_order > 5)
72 fallback_aper_order = 5;
60 aper_size = (32 * 1024 * 1024) << fallback_aper_order; 73 aper_size = (32 * 1024 * 1024) << fallback_aper_order;
61 74
62 /* 75 /*
@@ -65,7 +78,20 @@ static u32 __init allocate_aperture(void)
65 * memory. Unfortunately we cannot move it up because that would 78 * memory. Unfortunately we cannot move it up because that would
66 * make the IOMMU useless. 79 * make the IOMMU useless.
67 */ 80 */
68 p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); 81 /*
82 * using 512M as goal, in case kexec will load kernel_big
83 * that will do the on position decompress, and could overlap with
84 * that positon with gart that is used.
85 * sequende:
86 * kernel_small
87 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
88 * ==> kernel_small(gart area become e820_reserved)
89 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
90 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
91 * so don't use 512M below as gart iommu, leave the space for kernel
92 * code for safe
93 */
94 p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
69 if (!p || __pa(p)+aper_size > 0xffffffff) { 95 if (!p || __pa(p)+aper_size > 0xffffffff) {
70 printk(KERN_ERR 96 printk(KERN_ERR
71 "Cannot allocate aperture memory hole (%p,%uK)\n", 97 "Cannot allocate aperture memory hole (%p,%uK)\n",
@@ -83,69 +109,53 @@ static u32 __init allocate_aperture(void)
83 return (u32)__pa(p); 109 return (u32)__pa(p);
84} 110}
85 111
86static int __init aperture_valid(u64 aper_base, u32 aper_size)
87{
88 if (!aper_base)
89 return 0;
90
91 if (aper_base + aper_size > 0x100000000UL) {
92 printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
93 return 0;
94 }
95 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
96 printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
97 return 0;
98 }
99 if (aper_size < 64*1024*1024) {
100 printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20);
101 return 0;
102 }
103
104 return 1;
105}
106 112
107/* Find a PCI capability */ 113/* Find a PCI capability */
108static __u32 __init find_cap(int num, int slot, int func, int cap) 114static u32 __init find_cap(int bus, int slot, int func, int cap)
109{ 115{
110 int bytes; 116 int bytes;
111 u8 pos; 117 u8 pos;
112 118
113 if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & 119 if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) &
114 PCI_STATUS_CAP_LIST)) 120 PCI_STATUS_CAP_LIST))
115 return 0; 121 return 0;
116 122
117 pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); 123 pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST);
118 for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { 124 for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
119 u8 id; 125 u8 id;
120 126
121 pos &= ~3; 127 pos &= ~3;
122 id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); 128 id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID);
123 if (id == 0xff) 129 if (id == 0xff)
124 break; 130 break;
125 if (id == cap) 131 if (id == cap)
126 return pos; 132 return pos;
127 pos = read_pci_config_byte(num, slot, func, 133 pos = read_pci_config_byte(bus, slot, func,
128 pos+PCI_CAP_LIST_NEXT); 134 pos+PCI_CAP_LIST_NEXT);
129 } 135 }
130 return 0; 136 return 0;
131} 137}
132 138
133/* Read a standard AGPv3 bridge header */ 139/* Read a standard AGPv3 bridge header */
134static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) 140static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
135{ 141{
136 u32 apsize; 142 u32 apsize;
137 u32 apsizereg; 143 u32 apsizereg;
138 int nbits; 144 int nbits;
139 u32 aper_low, aper_hi; 145 u32 aper_low, aper_hi;
140 u64 aper; 146 u64 aper;
147 u32 old_order;
141 148
142 printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); 149 printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func);
143 apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); 150 apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14);
144 if (apsizereg == 0xffffffff) { 151 if (apsizereg == 0xffffffff) {
145 printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); 152 printk(KERN_ERR "APSIZE in AGP bridge unreadable\n");
146 return 0; 153 return 0;
147 } 154 }
148 155
156 /* old_order could be the value from NB gart setting */
157 old_order = *order;
158
149 apsize = apsizereg & 0xfff; 159 apsize = apsizereg & 0xfff;
150 /* Some BIOS use weird encodings not in the AGPv3 table. */ 160 /* Some BIOS use weird encodings not in the AGPv3 table. */
151 if (apsize & 0xff) 161 if (apsize & 0xff)
@@ -155,14 +165,26 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
155 if ((int)*order < 0) /* < 32MB */ 165 if ((int)*order < 0) /* < 32MB */
156 *order = 0; 166 *order = 0;
157 167
158 aper_low = read_pci_config(num, slot, func, 0x10); 168 aper_low = read_pci_config(bus, slot, func, 0x10);
159 aper_hi = read_pci_config(num, slot, func, 0x14); 169 aper_hi = read_pci_config(bus, slot, func, 0x14);
160 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); 170 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
161 171
172 /*
173 * On some sick chips, APSIZE is 0. It means it wants 4G
174 * so let double check that order, and lets trust AMD NB settings:
175 */
176 printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n",
177 aper, 32 << old_order);
178 if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) {
179 printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n",
180 32 << *order, apsizereg);
181 *order = old_order;
182 }
183
162 printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 184 printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n",
163 aper, 32 << *order, apsizereg); 185 aper, 32 << *order, apsizereg);
164 186
165 if (!aperture_valid(aper, (32*1024*1024) << *order)) 187 if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20))
166 return 0; 188 return 0;
167 return (u32)aper; 189 return (u32)aper;
168} 190}
@@ -180,17 +202,17 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
180 * the AGP bridges should be always an own bus on the HT hierarchy, 202 * the AGP bridges should be always an own bus on the HT hierarchy,
181 * but do it here for future safety. 203 * but do it here for future safety.
182 */ 204 */
183static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) 205static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
184{ 206{
185 int num, slot, func; 207 int bus, slot, func;
186 208
187 /* Poor man's PCI discovery */ 209 /* Poor man's PCI discovery */
188 for (num = 0; num < 256; num++) { 210 for (bus = 0; bus < 256; bus++) {
189 for (slot = 0; slot < 32; slot++) { 211 for (slot = 0; slot < 32; slot++) {
190 for (func = 0; func < 8; func++) { 212 for (func = 0; func < 8; func++) {
191 u32 class, cap; 213 u32 class, cap;
192 u8 type; 214 u8 type;
193 class = read_pci_config(num, slot, func, 215 class = read_pci_config(bus, slot, func,
194 PCI_CLASS_REVISION); 216 PCI_CLASS_REVISION);
195 if (class == 0xffffffff) 217 if (class == 0xffffffff)
196 break; 218 break;
@@ -199,17 +221,17 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp)
199 case PCI_CLASS_BRIDGE_HOST: 221 case PCI_CLASS_BRIDGE_HOST:
200 case PCI_CLASS_BRIDGE_OTHER: /* needed? */ 222 case PCI_CLASS_BRIDGE_OTHER: /* needed? */
201 /* AGP bridge? */ 223 /* AGP bridge? */
202 cap = find_cap(num, slot, func, 224 cap = find_cap(bus, slot, func,
203 PCI_CAP_ID_AGP); 225 PCI_CAP_ID_AGP);
204 if (!cap) 226 if (!cap)
205 break; 227 break;
206 *valid_agp = 1; 228 *valid_agp = 1;
207 return read_agp(num, slot, func, cap, 229 return read_agp(bus, slot, func, cap,
208 order); 230 order);
209 } 231 }
210 232
211 /* No multi-function device? */ 233 /* No multi-function device? */
212 type = read_pci_config_byte(num, slot, func, 234 type = read_pci_config_byte(bus, slot, func,
213 PCI_HEADER_TYPE); 235 PCI_HEADER_TYPE);
214 if (!(type & 0x80)) 236 if (!(type & 0x80))
215 break; 237 break;
@@ -249,36 +271,50 @@ void __init early_gart_iommu_check(void)
249 * or BIOS forget to put that in reserved. 271 * or BIOS forget to put that in reserved.
250 * try to update e820 to make that region as reserved. 272 * try to update e820 to make that region as reserved.
251 */ 273 */
252 int fix, num; 274 int i, fix, slot;
253 u32 ctl; 275 u32 ctl;
254 u32 aper_size = 0, aper_order = 0, last_aper_order = 0; 276 u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
255 u64 aper_base = 0, last_aper_base = 0; 277 u64 aper_base = 0, last_aper_base = 0;
256 int aper_enabled = 0, last_aper_enabled = 0; 278 int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0;
257 279
258 if (!early_pci_allowed()) 280 if (!early_pci_allowed())
259 return; 281 return;
260 282
283 /* This is mostly duplicate of iommu_hole_init */
261 fix = 0; 284 fix = 0;
262 for (num = 24; num < 32; num++) { 285 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
263 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 286 int bus;
264 continue; 287 int dev_base, dev_limit;
265 288
266 ctl = read_pci_config(0, num, 3, 0x90); 289 bus = bus_dev_ranges[i].bus;
267 aper_enabled = ctl & 1; 290 dev_base = bus_dev_ranges[i].dev_base;
268 aper_order = (ctl >> 1) & 7; 291 dev_limit = bus_dev_ranges[i].dev_limit;
269 aper_size = (32 * 1024 * 1024) << aper_order; 292
270 aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; 293 for (slot = dev_base; slot < dev_limit; slot++) {
271 aper_base <<= 25; 294 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
272 295 continue;
273 if ((last_aper_order && aper_order != last_aper_order) || 296
274 (last_aper_base && aper_base != last_aper_base) || 297 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
275 (last_aper_enabled && aper_enabled != last_aper_enabled)) { 298 aper_enabled = ctl & AMD64_GARTEN;
276 fix = 1; 299 aper_order = (ctl >> 1) & 7;
277 break; 300 aper_size = (32 * 1024 * 1024) << aper_order;
301 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
302 aper_base <<= 25;
303
304 if (last_valid) {
305 if ((aper_order != last_aper_order) ||
306 (aper_base != last_aper_base) ||
307 (aper_enabled != last_aper_enabled)) {
308 fix = 1;
309 break;
310 }
311 }
312
313 last_aper_order = aper_order;
314 last_aper_base = aper_base;
315 last_aper_enabled = aper_enabled;
316 last_valid = 1;
278 } 317 }
279 last_aper_order = aper_order;
280 last_aper_base = aper_base;
281 last_aper_enabled = aper_enabled;
282 } 318 }
283 319
284 if (!fix && !aper_enabled) 320 if (!fix && !aper_enabled)
@@ -290,32 +326,46 @@ void __init early_gart_iommu_check(void)
290 if (gart_fix_e820 && !fix && aper_enabled) { 326 if (gart_fix_e820 && !fix && aper_enabled) {
291 if (e820_any_mapped(aper_base, aper_base + aper_size, 327 if (e820_any_mapped(aper_base, aper_base + aper_size,
292 E820_RAM)) { 328 E820_RAM)) {
293 /* reserved it, so we can resuse it in second kernel */ 329 /* reserve it, so we can reuse it in second kernel */
294 printk(KERN_INFO "update e820 for GART\n"); 330 printk(KERN_INFO "update e820 for GART\n");
295 add_memory_region(aper_base, aper_size, E820_RESERVED); 331 e820_add_region(aper_base, aper_size, E820_RESERVED);
296 update_e820(); 332 update_e820();
297 } 333 }
298 return;
299 } 334 }
300 335
336 if (!fix)
337 return;
338
301 /* different nodes have different setting, disable them all at first*/ 339 /* different nodes have different setting, disable them all at first*/
302 for (num = 24; num < 32; num++) { 340 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
303 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 341 int bus;
304 continue; 342 int dev_base, dev_limit;
343
344 bus = bus_dev_ranges[i].bus;
345 dev_base = bus_dev_ranges[i].dev_base;
346 dev_limit = bus_dev_ranges[i].dev_limit;
347
348 for (slot = dev_base; slot < dev_limit; slot++) {
349 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
350 continue;
305 351
306 ctl = read_pci_config(0, num, 3, 0x90); 352 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
307 ctl &= ~1; 353 ctl &= ~AMD64_GARTEN;
308 write_pci_config(0, num, 3, 0x90, ctl); 354 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
355 }
309 } 356 }
310 357
311} 358}
312 359
360static int __initdata printed_gart_size_msg;
361
313void __init gart_iommu_hole_init(void) 362void __init gart_iommu_hole_init(void)
314{ 363{
364 u32 agp_aper_base = 0, agp_aper_order = 0;
315 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; 365 u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0;
316 u64 aper_base, last_aper_base = 0; 366 u64 aper_base, last_aper_base = 0;
317 int fix, num, valid_agp = 0; 367 int fix, slot, valid_agp = 0;
318 int node; 368 int i, node;
319 369
320 if (gart_iommu_aperture_disabled || !fix_aperture || 370 if (gart_iommu_aperture_disabled || !fix_aperture ||
321 !early_pci_allowed()) 371 !early_pci_allowed())
@@ -323,38 +373,65 @@ void __init gart_iommu_hole_init(void)
323 373
324 printk(KERN_INFO "Checking aperture...\n"); 374 printk(KERN_INFO "Checking aperture...\n");
325 375
376 if (!fallback_aper_force)
377 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
378
326 fix = 0; 379 fix = 0;
327 node = 0; 380 node = 0;
328 for (num = 24; num < 32; num++) { 381 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
329 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 382 int bus;
330 continue; 383 int dev_base, dev_limit;
331 384
332 iommu_detected = 1; 385 bus = bus_dev_ranges[i].bus;
333 gart_iommu_aperture = 1; 386 dev_base = bus_dev_ranges[i].dev_base;
334 387 dev_limit = bus_dev_ranges[i].dev_limit;
335 aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 388
336 aper_size = (32 * 1024 * 1024) << aper_order; 389 for (slot = dev_base; slot < dev_limit; slot++) {
337 aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; 390 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
338 aper_base <<= 25; 391 continue;
339 392
340 printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", 393 iommu_detected = 1;
341 node, aper_base, aper_size >> 20); 394 gart_iommu_aperture = 1;
342 node++; 395
343 396 aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
344 if (!aperture_valid(aper_base, aper_size)) { 397 aper_size = (32 * 1024 * 1024) << aper_order;
345 fix = 1; 398 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
346 break; 399 aper_base <<= 25;
347 } 400
401 printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
402 node, aper_base, aper_size >> 20);
403 node++;
404
405 if (!aperture_valid(aper_base, aper_size, 64<<20)) {
406 if (valid_agp && agp_aper_base &&
407 agp_aper_base == aper_base &&
408 agp_aper_order == aper_order) {
409 /* the same between two setting from NB and agp */
410 if (!no_iommu &&
411 max_pfn > MAX_DMA32_PFN &&
412 !printed_gart_size_msg) {
413 printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
414 printk(KERN_ERR "please increase GART size in your BIOS setup\n");
415 printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
416 printed_gart_size_msg = 1;
417 }
418 } else {
419 fix = 1;
420 goto out;
421 }
422 }
348 423
349 if ((last_aper_order && aper_order != last_aper_order) || 424 if ((last_aper_order && aper_order != last_aper_order) ||
350 (last_aper_base && aper_base != last_aper_base)) { 425 (last_aper_base && aper_base != last_aper_base)) {
351 fix = 1; 426 fix = 1;
352 break; 427 goto out;
428 }
429 last_aper_order = aper_order;
430 last_aper_base = aper_base;
353 } 431 }
354 last_aper_order = aper_order;
355 last_aper_base = aper_base;
356 } 432 }
357 433
434out:
358 if (!fix && !fallback_aper_force) { 435 if (!fix && !fallback_aper_force) {
359 if (last_aper_base) { 436 if (last_aper_base) {
360 unsigned long n = (32 * 1024 * 1024) << last_aper_order; 437 unsigned long n = (32 * 1024 * 1024) << last_aper_order;
@@ -364,14 +441,16 @@ void __init gart_iommu_hole_init(void)
364 return; 441 return;
365 } 442 }
366 443
367 if (!fallback_aper_force) 444 if (!fallback_aper_force) {
368 aper_alloc = search_agp_bridge(&aper_order, &valid_agp); 445 aper_alloc = agp_aper_base;
446 aper_order = agp_aper_order;
447 }
369 448
370 if (aper_alloc) { 449 if (aper_alloc) {
371 /* Got the aperture from the AGP bridge */ 450 /* Got the aperture from the AGP bridge */
372 } else if (swiotlb && !valid_agp) { 451 } else if (swiotlb && !valid_agp) {
373 /* Do nothing */ 452 /* Do nothing */
374 } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || 453 } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
375 force_iommu || 454 force_iommu ||
376 valid_agp || 455 valid_agp ||
377 fallback_aper_force) { 456 fallback_aper_force) {
@@ -401,16 +480,24 @@ void __init gart_iommu_hole_init(void)
401 } 480 }
402 481
403 /* Fix up the north bridges */ 482 /* Fix up the north bridges */
404 for (num = 24; num < 32; num++) { 483 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
405 if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) 484 int bus;
406 continue; 485 int dev_base, dev_limit;
407 486
408 /* 487 bus = bus_dev_ranges[i].bus;
409 * Don't enable translation yet. That is done later. 488 dev_base = bus_dev_ranges[i].dev_base;
410 * Assume this BIOS didn't initialise the GART so 489 dev_limit = bus_dev_ranges[i].dev_limit;
411 * just overwrite all previous bits 490 for (slot = dev_base; slot < dev_limit; slot++) {
412 */ 491 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
413 write_pci_config(0, num, 3, 0x90, aper_order<<1); 492 continue;
414 write_pci_config(0, num, 3, 0x94, aper_alloc>>25); 493
494 /* Don't enable translation yet. That is done later.
495 Assume this BIOS didn't initialise the GART so
496 just overwrite all previous bits */
497 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
498 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
499 }
415 } 500 }
501
502 set_up_gart_resume(aper_order, aper_alloc);
416} 503}
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6c..3e58b676d23b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -52,30 +52,41 @@
52 52
53unsigned long mp_lapic_addr; 53unsigned long mp_lapic_addr;
54 54
55DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
56EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
57
58/* 55/*
59 * Knob to control our willingness to enable the local APIC. 56 * Knob to control our willingness to enable the local APIC.
60 * 57 *
61 * -1=force-disable, +1=force-enable 58 * +1=force-enable
62 */ 59 */
63static int enable_local_apic __initdata; 60static int force_enable_local_apic;
61int disable_apic;
64 62
65/* Local APIC timer verification ok */ 63/* Local APIC timer verification ok */
66static int local_apic_timer_verify_ok; 64static int local_apic_timer_verify_ok;
67/* Disable local APIC timer from the kernel commandline or via dmi quirk 65/* Disable local APIC timer from the kernel commandline or via dmi quirk */
68 or using CPU MSR check */ 66static int local_apic_timer_disabled;
69int local_apic_timer_disabled;
70/* Local APIC timer works in C2 */ 67/* Local APIC timer works in C2 */
71int local_apic_timer_c2_ok; 68int local_apic_timer_c2_ok;
72EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 69EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
73 70
71int first_system_vector = 0xfe;
72
73char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
74
74/* 75/*
75 * Debug level, exported for io_apic.c 76 * Debug level, exported for io_apic.c
76 */ 77 */
77int apic_verbosity; 78int apic_verbosity;
78 79
80int pic_mode;
81
82/* Have we found an MP table */
83int smp_found_config;
84
85static struct resource lapic_resource = {
86 .name = "Local APIC",
87 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
88};
89
79static unsigned int calibration_result; 90static unsigned int calibration_result;
80 91
81static int lapic_next_event(unsigned long delta, 92static int lapic_next_event(unsigned long delta,
@@ -545,7 +556,7 @@ void __init setup_boot_APIC_clock(void)
545 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 556 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
546 else 557 else
547 printk(KERN_WARNING "APIC timer registered as dummy," 558 printk(KERN_WARNING "APIC timer registered as dummy,"
548 " due to nmi_watchdog=1!\n"); 559 " due to nmi_watchdog=%d!\n", nmi_watchdog);
549 } 560 }
550 561
551 /* Setup the lapic or request the broadcast */ 562 /* Setup the lapic or request the broadcast */
@@ -963,7 +974,7 @@ void __cpuinit setup_local_APIC(void)
963 * Double-check whether this APIC is really registered. 974 * Double-check whether this APIC is really registered.
964 */ 975 */
965 if (!apic_id_registered()) 976 if (!apic_id_registered())
966 BUG(); 977 WARN_ON_ONCE(1);
967 978
968 /* 979 /*
969 * Intel recommends to set DFR, LDR and TPR before enabling 980 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1094,7 +1105,7 @@ static int __init detect_init_APIC(void)
1094 u32 h, l, features; 1105 u32 h, l, features;
1095 1106
1096 /* Disabled by kernel option? */ 1107 /* Disabled by kernel option? */
1097 if (enable_local_apic < 0) 1108 if (disable_apic)
1098 return -1; 1109 return -1;
1099 1110
1100 switch (boot_cpu_data.x86_vendor) { 1111 switch (boot_cpu_data.x86_vendor) {
@@ -1117,7 +1128,7 @@ static int __init detect_init_APIC(void)
1117 * Over-ride BIOS and try to enable the local APIC only if 1128 * Over-ride BIOS and try to enable the local APIC only if
1118 * "lapic" specified. 1129 * "lapic" specified.
1119 */ 1130 */
1120 if (enable_local_apic <= 0) { 1131 if (!force_enable_local_apic) {
1121 printk(KERN_INFO "Local APIC disabled by BIOS -- " 1132 printk(KERN_INFO "Local APIC disabled by BIOS -- "
1122 "you can enable it with \"lapic\"\n"); 1133 "you can enable it with \"lapic\"\n");
1123 return -1; 1134 return -1;
@@ -1154,9 +1165,6 @@ static int __init detect_init_APIC(void)
1154 if (l & MSR_IA32_APICBASE_ENABLE) 1165 if (l & MSR_IA32_APICBASE_ENABLE)
1155 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; 1166 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1156 1167
1157 if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED)
1158 nmi_watchdog = NMI_LOCAL_APIC;
1159
1160 printk(KERN_INFO "Found and enabled local APIC!\n"); 1168 printk(KERN_INFO "Found and enabled local APIC!\n");
1161 1169
1162 apic_pm_activate(); 1170 apic_pm_activate();
@@ -1195,36 +1203,6 @@ void __init init_apic_mappings(void)
1195 if (boot_cpu_physical_apicid == -1U) 1203 if (boot_cpu_physical_apicid == -1U)
1196 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1204 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
1197 1205
1198#ifdef CONFIG_X86_IO_APIC
1199 {
1200 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
1201 int i;
1202
1203 for (i = 0; i < nr_ioapics; i++) {
1204 if (smp_found_config) {
1205 ioapic_phys = mp_ioapics[i].mpc_apicaddr;
1206 if (!ioapic_phys) {
1207 printk(KERN_ERR
1208 "WARNING: bogus zero IO-APIC "
1209 "address found in MPTABLE, "
1210 "disabling IO/APIC support!\n");
1211 smp_found_config = 0;
1212 skip_ioapic_setup = 1;
1213 goto fake_ioapic_page;
1214 }
1215 } else {
1216fake_ioapic_page:
1217 ioapic_phys = (unsigned long)
1218 alloc_bootmem_pages(PAGE_SIZE);
1219 ioapic_phys = __pa(ioapic_phys);
1220 }
1221 set_fixmap_nocache(idx, ioapic_phys);
1222 printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
1223 __fix_to_virt(idx), ioapic_phys);
1224 idx++;
1225 }
1226 }
1227#endif
1228} 1206}
1229 1207
1230/* 1208/*
@@ -1236,7 +1214,7 @@ int apic_version[MAX_APICS];
1236 1214
1237int __init APIC_init_uniprocessor(void) 1215int __init APIC_init_uniprocessor(void)
1238{ 1216{
1239 if (enable_local_apic < 0) 1217 if (disable_apic)
1240 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1218 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1241 1219
1242 if (!smp_found_config && !cpu_has_apic) 1220 if (!smp_found_config && !cpu_has_apic)
@@ -1265,10 +1243,14 @@ int __init APIC_init_uniprocessor(void)
1265#ifdef CONFIG_CRASH_DUMP 1243#ifdef CONFIG_CRASH_DUMP
1266 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1244 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
1267#endif 1245#endif
1268 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 1246 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1269 1247
1270 setup_local_APIC(); 1248 setup_local_APIC();
1271 1249
1250#ifdef CONFIG_X86_IO_APIC
1251 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1252#endif
1253 localise_nmi_watchdog();
1272 end_local_APIC_setup(); 1254 end_local_APIC_setup();
1273#ifdef CONFIG_X86_IO_APIC 1255#ifdef CONFIG_X86_IO_APIC
1274 if (smp_found_config) 1256 if (smp_found_config)
@@ -1351,13 +1333,13 @@ void __init smp_intr_init(void)
1351 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 1333 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1352 * IPI, driven by wakeup. 1334 * IPI, driven by wakeup.
1353 */ 1335 */
1354 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 1336 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1355 1337
1356 /* IPI for invalidation */ 1338 /* IPI for invalidation */
1357 set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 1339 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1358 1340
1359 /* IPI for generic function call */ 1341 /* IPI for generic function call */
1360 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 1342 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1361} 1343}
1362#endif 1344#endif
1363 1345
@@ -1370,15 +1352,15 @@ void __init apic_intr_init(void)
1370 smp_intr_init(); 1352 smp_intr_init();
1371#endif 1353#endif
1372 /* self generated IPI for local APIC timer */ 1354 /* self generated IPI for local APIC timer */
1373 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); 1355 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1374 1356
1375 /* IPI vectors for APIC spurious and error interrupts */ 1357 /* IPI vectors for APIC spurious and error interrupts */
1376 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 1358 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1377 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 1359 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1378 1360
1379 /* thermal monitor LVT interrupt */ 1361 /* thermal monitor LVT interrupt */
1380#ifdef CONFIG_X86_MCE_P4THERMAL 1362#ifdef CONFIG_X86_MCE_P4THERMAL
1381 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 1363 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1382#endif 1364#endif
1383} 1365}
1384 1366
@@ -1513,6 +1495,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1513 */ 1495 */
1514 cpu = 0; 1496 cpu = 0;
1515 1497
1498 if (apicid > max_physical_apicid)
1499 max_physical_apicid = apicid;
1500
1516 /* 1501 /*
1517 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1502 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1518 * but we need to work other dependencies like SMP_SUSPEND etc 1503 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1520,7 +1505,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1520 * if (CPU_HOTPLUG_ENABLED || num_processors > 8) 1505 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1521 * - Ashok Raj <ashok.raj@intel.com> 1506 * - Ashok Raj <ashok.raj@intel.com>
1522 */ 1507 */
1523 if (num_processors > 8) { 1508 if (max_physical_apicid >= 8) {
1524 switch (boot_cpu_data.x86_vendor) { 1509 switch (boot_cpu_data.x86_vendor) {
1525 case X86_VENDOR_INTEL: 1510 case X86_VENDOR_INTEL:
1526 if (!APIC_XAPIC(version)) { 1511 if (!APIC_XAPIC(version)) {
@@ -1534,9 +1519,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1534 } 1519 }
1535#ifdef CONFIG_SMP 1520#ifdef CONFIG_SMP
1536 /* are we being called early in kernel startup? */ 1521 /* are we being called early in kernel startup? */
1537 if (x86_cpu_to_apicid_early_ptr) { 1522 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1538 u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; 1523 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1539 u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; 1524 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1540 1525
1541 cpu_to_apicid[cpu] = apicid; 1526 cpu_to_apicid[cpu] = apicid;
1542 bios_cpu_apicid[cpu] = apicid; 1527 bios_cpu_apicid[cpu] = apicid;
@@ -1703,14 +1688,14 @@ static void apic_pm_activate(void) { }
1703 */ 1688 */
1704static int __init parse_lapic(char *arg) 1689static int __init parse_lapic(char *arg)
1705{ 1690{
1706 enable_local_apic = 1; 1691 force_enable_local_apic = 1;
1707 return 0; 1692 return 0;
1708} 1693}
1709early_param("lapic", parse_lapic); 1694early_param("lapic", parse_lapic);
1710 1695
1711static int __init parse_nolapic(char *arg) 1696static int __init parse_nolapic(char *arg)
1712{ 1697{
1713 enable_local_apic = -1; 1698 disable_apic = 1;
1714 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1699 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1715 return 0; 1700 return 0;
1716} 1701}
@@ -1740,3 +1725,21 @@ static int __init apic_set_verbosity(char *str)
1740} 1725}
1741__setup("apic=", apic_set_verbosity); 1726__setup("apic=", apic_set_verbosity);
1742 1727
1728static int __init lapic_insert_resource(void)
1729{
1730 if (!apic_phys)
1731 return -1;
1732
1733 /* Put local APIC into the resource map. */
1734 lapic_resource.start = apic_phys;
1735 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
1736 insert_resource(&iomem_resource, &lapic_resource);
1737
1738 return 0;
1739}
1740
1741/*
1742 * need call insert after e820_reserve_resources()
1743 * that is using request_resource
1744 */
1745late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0633cfd0dc29..1e3d32e27c14 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -43,7 +43,7 @@
43#include <mach_ipi.h> 43#include <mach_ipi.h>
44#include <mach_apic.h> 44#include <mach_apic.h>
45 45
46int disable_apic_timer __cpuinitdata; 46static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 47static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 48int disable_apic;
49 49
@@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
56 */ 56 */
57int apic_verbosity; 57int apic_verbosity;
58 58
59/* Have we found an MP table */
60int smp_found_config;
61
59static struct resource lapic_resource = { 62static struct resource lapic_resource = {
60 .name = "Local APIC", 63 .name = "Local APIC",
61 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 64 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -87,9 +90,6 @@ static unsigned long apic_phys;
87 90
88unsigned long mp_lapic_addr; 91unsigned long mp_lapic_addr;
89 92
90DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
91EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
92
93unsigned int __cpuinitdata maxcpus = NR_CPUS; 93unsigned int __cpuinitdata maxcpus = NR_CPUS;
94/* 94/*
95 * Get the LAPIC version 95 * Get the LAPIC version
@@ -417,37 +417,13 @@ void __init setup_boot_APIC_clock(void)
417 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; 417 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
418 else 418 else
419 printk(KERN_WARNING "APIC timer registered as dummy," 419 printk(KERN_WARNING "APIC timer registered as dummy,"
420 " due to nmi_watchdog=1!\n"); 420 " due to nmi_watchdog=%d!\n", nmi_watchdog);
421 421
422 setup_APIC_timer(); 422 setup_APIC_timer();
423} 423}
424 424
425/*
426 * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the
427 * C1E flag only in the secondary CPU, so when we detect the wreckage
428 * we already have enabled the boot CPU local apic timer. Check, if
429 * disable_apic_timer is set and the DUMMY flag is cleared. If yes,
430 * set the DUMMY flag again and force the broadcast mode in the
431 * clockevents layer.
432 */
433static void __cpuinit check_boot_apic_timer_broadcast(void)
434{
435 if (!disable_apic_timer ||
436 (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY))
437 return;
438
439 printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n");
440 lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
441
442 local_irq_enable();
443 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
444 &boot_cpu_physical_apicid);
445 local_irq_disable();
446}
447
448void __cpuinit setup_secondary_APIC_clock(void) 425void __cpuinit setup_secondary_APIC_clock(void)
449{ 426{
450 check_boot_apic_timer_broadcast();
451 setup_APIC_timer(); 427 setup_APIC_timer();
452} 428}
453 429
@@ -850,7 +826,6 @@ static void __cpuinit lapic_setup_esr(void)
850void __cpuinit end_local_APIC_setup(void) 826void __cpuinit end_local_APIC_setup(void)
851{ 827{
852 lapic_setup_esr(); 828 lapic_setup_esr();
853 nmi_watchdog_default();
854 setup_apic_nmi_watchdog(NULL); 829 setup_apic_nmi_watchdog(NULL);
855 apic_pm_activate(); 830 apic_pm_activate();
856} 831}
@@ -875,7 +850,7 @@ static int __init detect_init_APIC(void)
875 850
876void __init early_init_lapic_mapping(void) 851void __init early_init_lapic_mapping(void)
877{ 852{
878 unsigned long apic_phys; 853 unsigned long phys_addr;
879 854
880 /* 855 /*
881 * If no local APIC can be found then go out 856 * If no local APIC can be found then go out
@@ -884,11 +859,11 @@ void __init early_init_lapic_mapping(void)
884 if (!smp_found_config) 859 if (!smp_found_config)
885 return; 860 return;
886 861
887 apic_phys = mp_lapic_addr; 862 phys_addr = mp_lapic_addr;
888 863
889 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 864 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
890 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 865 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
891 APIC_BASE, apic_phys); 866 APIC_BASE, phys_addr);
892 867
893 /* 868 /*
894 * Fetch the APIC ID of the BSP in case we have a 869 * Fetch the APIC ID of the BSP in case we have a
@@ -942,7 +917,9 @@ int __init APIC_init_uniprocessor(void)
942 917
943 verify_local_APIC(); 918 verify_local_APIC();
944 919
945 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 920 connect_bsp_APIC();
921
922 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
946 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); 923 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
947 924
948 setup_local_APIC(); 925 setup_local_APIC();
@@ -954,6 +931,8 @@ int __init APIC_init_uniprocessor(void)
954 if (!skip_ioapic_setup && nr_ioapics) 931 if (!skip_ioapic_setup && nr_ioapics)
955 enable_IO_APIC(); 932 enable_IO_APIC();
956 933
934 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
935 localise_nmi_watchdog();
957 end_local_APIC_setup(); 936 end_local_APIC_setup();
958 937
959 if (smp_found_config && !skip_ioapic_setup && nr_ioapics) 938 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
@@ -1021,6 +1000,14 @@ asmlinkage void smp_error_interrupt(void)
1021 irq_exit(); 1000 irq_exit();
1022} 1001}
1023 1002
1003/**
1004 * * connect_bsp_APIC - attach the APIC to the interrupt system
1005 * */
1006void __init connect_bsp_APIC(void)
1007{
1008 enable_apic_mode();
1009}
1010
1024void disconnect_bsp_APIC(int virt_wire_setup) 1011void disconnect_bsp_APIC(int virt_wire_setup)
1025{ 1012{
1026 /* Go back to Virtual Wire compatibility mode */ 1013 /* Go back to Virtual Wire compatibility mode */
@@ -1090,10 +1077,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
1090 */ 1077 */
1091 cpu = 0; 1078 cpu = 0;
1092 } 1079 }
1080 if (apicid > max_physical_apicid)
1081 max_physical_apicid = apicid;
1082
1093 /* are we being called early in kernel startup? */ 1083 /* are we being called early in kernel startup? */
1094 if (x86_cpu_to_apicid_early_ptr) { 1084 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1095 u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; 1085 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1096 u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; 1086 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1097 1087
1098 cpu_to_apicid[cpu] = apicid; 1088 cpu_to_apicid[cpu] = apicid;
1099 bios_cpu_apicid[cpu] = apicid; 1089 bios_cpu_apicid[cpu] = apicid;
@@ -1269,7 +1259,7 @@ __cpuinit int apic_is_clustered_box(void)
1269 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) 1259 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
1270 return 0; 1260 return 0;
1271 1261
1272 bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; 1262 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1273 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 1263 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
1274 1264
1275 for (i = 0; i < NR_CPUS; i++) { 1265 for (i = 0; i < NR_CPUS; i++) {
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index bf9290e29013..00e6d1370954 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
228#include <linux/suspend.h> 228#include <linux/suspend.h>
229#include <linux/kthread.h> 229#include <linux/kthread.h>
230#include <linux/jiffies.h> 230#include <linux/jiffies.h>
231#include <linux/smp_lock.h>
231 232
232#include <asm/system.h> 233#include <asm/system.h>
233#include <asm/uaccess.h> 234#include <asm/uaccess.h>
@@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
1149 as->event_tail = 0; 1150 as->event_tail = 0;
1150 } 1151 }
1151 as->events[as->event_head] = event; 1152 as->events[as->event_head] = event;
1152 if ((!as->suser) || (!as->writer)) 1153 if (!as->suser || !as->writer)
1153 continue; 1154 continue;
1154 switch (event) { 1155 switch (event) {
1155 case APM_SYS_SUSPEND: 1156 case APM_SYS_SUSPEND:
@@ -1396,7 +1397,7 @@ static void apm_mainloop(void)
1396 1397
1397static int check_apm_user(struct apm_user *as, const char *func) 1398static int check_apm_user(struct apm_user *as, const char *func)
1398{ 1399{
1399 if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { 1400 if (as == NULL || as->magic != APM_BIOS_MAGIC) {
1400 printk(KERN_ERR "apm: %s passed bad filp\n", func); 1401 printk(KERN_ERR "apm: %s passed bad filp\n", func);
1401 return 1; 1402 return 1;
1402 } 1403 }
@@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait)
1459 return 0; 1460 return 0;
1460} 1461}
1461 1462
1462static int do_ioctl(struct inode *inode, struct file *filp, 1463static long do_ioctl(struct file *filp, u_int cmd, u_long arg)
1463 u_int cmd, u_long arg)
1464{ 1464{
1465 struct apm_user *as; 1465 struct apm_user *as;
1466 int ret;
1466 1467
1467 as = filp->private_data; 1468 as = filp->private_data;
1468 if (check_apm_user(as, "ioctl")) 1469 if (check_apm_user(as, "ioctl"))
1469 return -EIO; 1470 return -EIO;
1470 if ((!as->suser) || (!as->writer)) 1471 if (!as->suser || !as->writer)
1471 return -EPERM; 1472 return -EPERM;
1472 switch (cmd) { 1473 switch (cmd) {
1473 case APM_IOC_STANDBY: 1474 case APM_IOC_STANDBY:
1475 lock_kernel();
1474 if (as->standbys_read > 0) { 1476 if (as->standbys_read > 0) {
1475 as->standbys_read--; 1477 as->standbys_read--;
1476 as->standbys_pending--; 1478 as->standbys_pending--;
@@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp,
1479 queue_event(APM_USER_STANDBY, as); 1481 queue_event(APM_USER_STANDBY, as);
1480 if (standbys_pending <= 0) 1482 if (standbys_pending <= 0)
1481 standby(); 1483 standby();
1484 unlock_kernel();
1482 break; 1485 break;
1483 case APM_IOC_SUSPEND: 1486 case APM_IOC_SUSPEND:
1487 lock_kernel();
1484 if (as->suspends_read > 0) { 1488 if (as->suspends_read > 0) {
1485 as->suspends_read--; 1489 as->suspends_read--;
1486 as->suspends_pending--; 1490 as->suspends_pending--;
@@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp,
1488 } else 1492 } else
1489 queue_event(APM_USER_SUSPEND, as); 1493 queue_event(APM_USER_SUSPEND, as);
1490 if (suspends_pending <= 0) { 1494 if (suspends_pending <= 0) {
1491 return suspend(1); 1495 ret = suspend(1);
1492 } else { 1496 } else {
1493 as->suspend_wait = 1; 1497 as->suspend_wait = 1;
1494 wait_event_interruptible(apm_suspend_waitqueue, 1498 wait_event_interruptible(apm_suspend_waitqueue,
1495 as->suspend_wait == 0); 1499 as->suspend_wait == 0);
1496 return as->suspend_result; 1500 ret = as->suspend_result;
1497 } 1501 }
1498 break; 1502 unlock_kernel();
1503 return ret;
1499 default: 1504 default:
1500 return -EINVAL; 1505 return -ENOTTY;
1501 } 1506 }
1502 return 0; 1507 return 0;
1503} 1508}
@@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = {
1860 .owner = THIS_MODULE, 1865 .owner = THIS_MODULE,
1861 .read = do_read, 1866 .read = do_read,
1862 .poll = do_poll, 1867 .poll = do_poll,
1863 .ioctl = do_ioctl, 1868 .unlocked_ioctl = do_ioctl,
1864 .open = do_open, 1869 .open = do_open,
1865 .release = do_release, 1870 .release = do_release,
1866}; 1871};
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 92588083950f..6649d09ad88f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,7 +111,7 @@ void foo(void)
111 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); 111 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
112 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); 112 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
113 OFFSET(PV_CPU_iret, pv_cpu_ops, iret); 113 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
114 OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); 114 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
115 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); 115 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
116#endif 116#endif
117 117
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f126c05d6170..bacf5deeec2d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -34,7 +34,7 @@ int main(void)
34 ENTRY(pid); 34 ENTRY(pid);
35 BLANK(); 35 BLANK();
36#undef ENTRY 36#undef ENTRY
37#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry)) 37#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
38 ENTRY(flags); 38 ENTRY(flags);
39 ENTRY(addr_limit); 39 ENTRY(addr_limit);
40 ENTRY(preempt_count); 40 ENTRY(preempt_count);
@@ -61,8 +61,11 @@ int main(void)
61 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); 61 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
62 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); 62 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
63 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); 63 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
64 OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
64 OFFSET(PV_CPU_iret, pv_cpu_ops, iret); 65 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
65 OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); 66 OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
67 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
68 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
66 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 69 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
67 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); 70 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
68#endif 71#endif
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0c6f8190887..ee76eaad3001 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o feature_names.o 6obj-y += proc.o feature_names.o
7 7
8obj-$(CONFIG_X86_32) += common.o bugs.o 8obj-$(CONFIG_X86_32) += common.o bugs.o
9obj-$(CONFIG_X86_64) += common_64.o bugs_64.o
9obj-$(CONFIG_X86_32) += amd.o 10obj-$(CONFIG_X86_32) += amd.o
11obj-$(CONFIG_X86_64) += amd_64.o
10obj-$(CONFIG_X86_32) += cyrix.o 12obj-$(CONFIG_X86_32) += cyrix.o
11obj-$(CONFIG_X86_32) += centaur.o 13obj-$(CONFIG_X86_32) += centaur.o
14obj-$(CONFIG_X86_64) += centaur_64.o
12obj-$(CONFIG_X86_32) += transmeta.o 15obj-$(CONFIG_X86_32) += transmeta.o
13obj-$(CONFIG_X86_32) += intel.o 16obj-$(CONFIG_X86_32) += intel.o
17obj-$(CONFIG_X86_64) += intel_64.o
14obj-$(CONFIG_X86_32) += umc.o 18obj-$(CONFIG_X86_32) += umc.o
15 19
16obj-$(CONFIG_X86_MCE) += mcheck/ 20obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c2e1ce33c7cb..84a8220a6072 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -1,9 +1,7 @@
1
2/* 1/*
3 * Routines to indentify additional cpu features that are scattered in 2 * Routines to indentify additional cpu features that are scattered in
4 * cpuid space. 3 * cpuid space.
5 */ 4 */
6
7#include <linux/cpu.h> 5#include <linux/cpu.h>
8 6
9#include <asm/pat.h> 7#include <asm/pat.h>
@@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
53#ifdef CONFIG_X86_PAT 51#ifdef CONFIG_X86_PAT
54void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) 52void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
55{ 53{
54 if (!cpu_has_pat)
55 pat_disable("PAT not supported by CPU.");
56
56 switch (c->x86_vendor) { 57 switch (c->x86_vendor) {
57 case X86_VENDOR_AMD:
58 if (c->x86 >= 0xf && c->x86 <= 0x11)
59 return;
60 break;
61 case X86_VENDOR_INTEL: 58 case X86_VENDOR_INTEL:
62 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) 59 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
63 return; 60 return;
64 break; 61 break;
62 case X86_VENDOR_AMD:
63 case X86_VENDOR_CENTAUR:
64 case X86_VENDOR_TRANSMETA:
65 return;
65 } 66 }
66 67
67 pat_disable(cpu_has_pat ? 68 pat_disable("PAT disabled. Not yet verified on this CPU type.");
68 "PAT disabled. Not yet verified on this CPU type." :
69 "PAT not supported by CPU.");
70} 69}
71#endif 70#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 245866828294..81a07ca65d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -24,43 +24,6 @@
24extern void vide(void); 24extern void vide(void);
25__asm__(".align 4\nvide: ret"); 25__asm__(".align 4\nvide: ret");
26 26
27#ifdef CONFIG_X86_LOCAL_APIC
28#define ENABLE_C1E_MASK 0x18000000
29#define CPUID_PROCESSOR_SIGNATURE 1
30#define CPUID_XFAM 0x0ff00000
31#define CPUID_XFAM_K8 0x00000000
32#define CPUID_XFAM_10H 0x00100000
33#define CPUID_XFAM_11H 0x00200000
34#define CPUID_XMOD 0x000f0000
35#define CPUID_XMOD_REV_F 0x00040000
36
37/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
38static __cpuinit int amd_apic_timer_broken(void)
39{
40 u32 lo, hi;
41 u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
42 switch (eax & CPUID_XFAM) {
43 case CPUID_XFAM_K8:
44 if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
45 break;
46 case CPUID_XFAM_10H:
47 case CPUID_XFAM_11H:
48 rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
49 if (lo & ENABLE_C1E_MASK) {
50 if (smp_processor_id() != boot_cpu_physical_apicid)
51 printk(KERN_INFO "AMD C1E detected late. "
52 " Force timer broadcast.\n");
53 return 1;
54 }
55 break;
56 default:
57 /* err on the side of caution */
58 return 1;
59 }
60 return 0;
61}
62#endif
63
64int force_mwait __cpuinitdata; 27int force_mwait __cpuinitdata;
65 28
66static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 29static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
@@ -297,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
297 num_cache_leaves = 3; 260 num_cache_leaves = 3;
298 } 261 }
299 262
300#ifdef CONFIG_X86_LOCAL_APIC
301 if (amd_apic_timer_broken())
302 local_apic_timer_disabled = 1;
303#endif
304
305 /* K6s reports MCEs but don't actually have all the MSRs */ 263 /* K6s reports MCEs but don't actually have all the MSRs */
306 if (c->x86 < 6) 264 if (c->x86 < 6)
307 clear_cpu_cap(c, X86_FEATURE_MCE); 265 clear_cpu_cap(c, X86_FEATURE_MCE);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
new file mode 100644
index 000000000000..bd182b7616ee
--- /dev/null
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -0,0 +1,221 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3
4#include <asm/numa_64.h>
5#include <asm/mmconfig.h>
6#include <asm/cacheflush.h>
7
8#include <mach_apic.h>
9
10#include "cpu.h"
11
12int force_mwait __cpuinitdata;
13
14#ifdef CONFIG_NUMA
15static int __cpuinit nearby_node(int apicid)
16{
17 int i, node;
18
19 for (i = apicid - 1; i >= 0; i--) {
20 node = apicid_to_node[i];
21 if (node != NUMA_NO_NODE && node_online(node))
22 return node;
23 }
24 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
25 node = apicid_to_node[i];
26 if (node != NUMA_NO_NODE && node_online(node))
27 return node;
28 }
29 return first_node(node_online_map); /* Shouldn't happen */
30}
31#endif
32
33/*
34 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
35 * Assumes number of cores is a power of two.
36 */
37static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
38{
39#ifdef CONFIG_SMP
40 unsigned bits;
41#ifdef CONFIG_NUMA
42 int cpu = smp_processor_id();
43 int node = 0;
44 unsigned apicid = hard_smp_processor_id();
45#endif
46 bits = c->x86_coreid_bits;
47
48 /* Low order bits define the core id (index of core in socket) */
49 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
50 /* Convert the initial APIC ID into the socket ID */
51 c->phys_proc_id = c->initial_apicid >> bits;
52
53#ifdef CONFIG_NUMA
54 node = c->phys_proc_id;
55 if (apicid_to_node[apicid] != NUMA_NO_NODE)
56 node = apicid_to_node[apicid];
57 if (!node_online(node)) {
58 /* Two possibilities here:
59 - The CPU is missing memory and no node was created.
60 In that case try picking one from a nearby CPU
61 - The APIC IDs differ from the HyperTransport node IDs
62 which the K8 northbridge parsing fills in.
63 Assume they are all increased by a constant offset,
64 but in the same order as the HT nodeids.
65 If that doesn't result in a usable node fall back to the
66 path for the previous case. */
67
68 int ht_nodeid = c->initial_apicid;
69
70 if (ht_nodeid >= 0 &&
71 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
72 node = apicid_to_node[ht_nodeid];
73 /* Pick a nearby node */
74 if (!node_online(node))
75 node = nearby_node(apicid);
76 }
77 numa_set_node(cpu, node);
78
79 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
80#endif
81#endif
82}
83
84static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
85{
86#ifdef CONFIG_SMP
87 unsigned bits, ecx;
88
89 /* Multi core CPU? */
90 if (c->extended_cpuid_level < 0x80000008)
91 return;
92
93 ecx = cpuid_ecx(0x80000008);
94
95 c->x86_max_cores = (ecx & 0xff) + 1;
96
97 /* CPU telling us the core id bits shift? */
98 bits = (ecx >> 12) & 0xF;
99
100 /* Otherwise recompute */
101 if (bits == 0) {
102 while ((1 << bits) < c->x86_max_cores)
103 bits++;
104 }
105
106 c->x86_coreid_bits = bits;
107
108#endif
109}
110
111static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
112{
113 early_init_amd_mc(c);
114
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118}
119
120static void __cpuinit init_amd(struct cpuinfo_x86 *c)
121{
122 unsigned level;
123
124#ifdef CONFIG_SMP
125 unsigned long value;
126
127 /*
128 * Disable TLB flush filter by setting HWCR.FFDIS on K8
129 * bit 6 of msr C001_0015
130 *
131 * Errata 63 for SH-B3 steppings
132 * Errata 122 for all steppings (F+ have it disabled by default)
133 */
134 if (c->x86 == 0xf) {
135 rdmsrl(MSR_K8_HWCR, value);
136 value |= 1 << 6;
137 wrmsrl(MSR_K8_HWCR, value);
138 }
139#endif
140
141 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
142 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
143 clear_cpu_cap(c, 0*32+31);
144
145 /* On C+ stepping K8 rep microcode works well for copy/memset */
146 if (c->x86 == 0xf) {
147 level = cpuid_eax(1);
148 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
149 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
150 }
151 if (c->x86 == 0x10 || c->x86 == 0x11)
152 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
153
154 /* Enable workaround for FXSAVE leak */
155 if (c->x86 >= 6)
156 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
157
158 level = get_model_name(c);
159 if (!level) {
160 switch (c->x86) {
161 case 0xf:
162 /* Should distinguish Models here, but this is only
163 a fallback anyways. */
164 strcpy(c->x86_model_id, "Hammer");
165 break;
166 }
167 }
168 display_cacheinfo(c);
169
170 /* Multi core CPU? */
171 if (c->extended_cpuid_level >= 0x80000008)
172 amd_detect_cmp(c);
173
174 if (c->extended_cpuid_level >= 0x80000006 &&
175 (cpuid_edx(0x80000006) & 0xf000))
176 num_cache_leaves = 4;
177 else
178 num_cache_leaves = 3;
179
180 if (c->x86 >= 0xf && c->x86 <= 0x11)
181 set_cpu_cap(c, X86_FEATURE_K8);
182
183 /* MFENCE stops RDTSC speculation */
184 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
185
186 if (c->x86 == 0x10) {
187 /* do this for boot cpu */
188 if (c == &boot_cpu_data)
189 check_enable_amd_mmconf_dmi();
190
191 fam10h_check_enable_mmcfg();
192 }
193
194 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
195 unsigned long long tseg;
196
197 /*
198 * Split up direct mapping around the TSEG SMM area.
199 * Don't do it for gbpages because there seems very little
200 * benefit in doing so.
201 */
202 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
203 if ((tseg>>PMD_SHIFT) <
204 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
205 ((tseg>>PMD_SHIFT) <
206 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
207 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
208 set_memory_4k((unsigned long)__va(tseg), 1);
209 }
210 }
211}
212
213static struct cpu_dev amd_cpu_dev __cpuinitdata = {
214 .c_vendor = "AMD",
215 .c_ident = { "AuthenticAMD" },
216 .c_early_init = early_init_amd,
217 .c_init = init_amd,
218};
219
220cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
221
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 170d2f5523b2..1b1c56bb338f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -59,8 +59,12 @@ static void __init check_fpu(void)
59 return; 59 return;
60 } 60 }
61 61
62/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ 62 /*
63 /* Test for the divl bug.. */ 63 * trap_init() enabled FXSR and company _before_ testing for FP
64 * problems here.
65 *
66 * Test for the divl bug..
67 */
64 __asm__("fninit\n\t" 68 __asm__("fninit\n\t"
65 "fldl %1\n\t" 69 "fldl %1\n\t"
66 "fdivl %2\n\t" 70 "fdivl %2\n\t"
@@ -108,10 +112,15 @@ static void __init check_popad(void)
108 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " 112 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
109 : "=&a" (res) 113 : "=&a" (res)
110 : "d" (inp) 114 : "d" (inp)
111 : "ecx", "edi" ); 115 : "ecx", "edi");
112 /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ 116 /*
113 if (res != 12345678) printk( "Buggy.\n" ); 117 * If this fails, it means that any user program may lock the
114 else printk( "OK.\n" ); 118 * CPU hard. Too bad.
119 */
120 if (res != 12345678)
121 printk("Buggy.\n");
122 else
123 printk("OK.\n");
115#endif 124#endif
116} 125}
117 126
@@ -137,7 +146,8 @@ static void __init check_config(void)
137 * i486+ only features! (WP works in supervisor mode and the 146 * i486+ only features! (WP works in supervisor mode and the
138 * new "invlpg" and "bswap" instructions) 147 * new "invlpg" and "bswap" instructions)
139 */ 148 */
140#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) 149#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
150 defined(CONFIG_X86_BSWAP)
141 if (boot_cpu_data.x86 == 3) 151 if (boot_cpu_data.x86 == 3)
142 panic("Kernel requires i486+ for 'invlpg' and other features"); 152 panic("Kernel requires i486+ for 'invlpg' and other features");
143#endif 153#endif
@@ -170,6 +180,7 @@ void __init check_bugs(void)
170 check_fpu(); 180 check_fpu();
171 check_hlt(); 181 check_hlt();
172 check_popad(); 182 check_popad();
173 init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 183 init_utsname()->machine[1] =
184 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
174 alternative_instructions(); 185 alternative_instructions();
175} 186}
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 9a3ed0649d4e..9a3ed0649d4e 100644
--- a/arch/x86/kernel/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
new file mode 100644
index 000000000000..2026d2119cdb
--- /dev/null
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -0,0 +1,45 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3
4#include <asm/cpufeature.h>
5#include <asm/processor.h>
6
7#include "cpu.h"
8
9static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
10{
11 if (c->x86 == 0x6 && c->x86_model >= 0xf)
12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
13
14 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
15}
16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{
19 /* Cache sizes */
20 unsigned n;
21
22 n = c->extended_cpuid_level;
23 if (n >= 0x80000008) {
24 unsigned eax = cpuid_eax(0x80000008);
25 c->x86_virt_bits = (eax >> 8) & 0xff;
26 c->x86_phys_bits = eax & 0xff;
27 }
28
29 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
30 c->x86_cache_alignment = c->x86_clflush_size * 2;
31 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
32 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
33 }
34 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
35}
36
37static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
38 .c_vendor = "Centaur",
39 .c_ident = { "CentaurHauls" },
40 .c_early_init = early_init_centaur,
41 .c_init = init_centaur,
42};
43
44cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);
45
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d0463a946247..80ab20d4fa39 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,7 +427,7 @@ __setup("serialnumber", x86_serial_nr_setup);
427/* 427/*
428 * This does the hard work of actually picking apart the CPU stuff... 428 * This does the hard work of actually picking apart the CPU stuff...
429 */ 429 */
430void __cpuinit identify_cpu(struct cpuinfo_x86 *c) 430static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
431{ 431{
432 int i; 432 int i;
433 433
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
new file mode 100644
index 000000000000..36537ab9e56a
--- /dev/null
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -0,0 +1,679 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
10#include <linux/string.h>
11#include <linux/delay.h>
12#include <linux/smp.h>
13#include <linux/module.h>
14#include <linux/percpu.h>
15#include <asm/processor.h>
16#include <asm/i387.h>
17#include <asm/msr.h>
18#include <asm/io.h>
19#include <asm/mmu_context.h>
20#include <asm/mtrr.h>
21#include <asm/mce.h>
22#include <asm/pat.h>
23#include <asm/numa.h>
24#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h>
26#include <asm/apic.h>
27#include <mach_apic.h>
28#endif
29#include <asm/pda.h>
30#include <asm/pgtable.h>
31#include <asm/processor.h>
32#include <asm/desc.h>
33#include <asm/atomic.h>
34#include <asm/proto.h>
35#include <asm/sections.h>
36#include <asm/setup.h>
37#include <asm/genapic.h>
38
39#include "cpu.h"
40
41/* We need valid kernel segments for data and code in long mode too
42 * IRET will check the segment types kkeil 2000/10/28
43 * Also sysret mandates a special GDT layout
44 */
45/* The TLS descriptors are currently at a different place compared to i386.
46 Hopefully nobody expects them at a fixed place (Wine?) */
47DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
48 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
49 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
50 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
51 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
52 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
53 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
54} };
55EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
56
57__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
58
59/* Current gdt points %fs at the "master" per-cpu area: after this,
60 * it's on the real one. */
61void switch_to_new_gdt(void)
62{
63 struct desc_ptr gdt_descr;
64
65 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
66 gdt_descr.size = GDT_SIZE - 1;
67 load_gdt(&gdt_descr);
68}
69
70struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
71
72static void __cpuinit default_init(struct cpuinfo_x86 *c)
73{
74 display_cacheinfo(c);
75}
76
77static struct cpu_dev __cpuinitdata default_cpu = {
78 .c_init = default_init,
79 .c_vendor = "Unknown",
80};
81static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
82
83int __cpuinit get_model_name(struct cpuinfo_x86 *c)
84{
85 unsigned int *v;
86
87 if (c->extended_cpuid_level < 0x80000004)
88 return 0;
89
90 v = (unsigned int *) c->x86_model_id;
91 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
92 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
93 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
94 c->x86_model_id[48] = 0;
95 return 1;
96}
97
98
99void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
100{
101 unsigned int n, dummy, eax, ebx, ecx, edx;
102
103 n = c->extended_cpuid_level;
104
105 if (n >= 0x80000005) {
106 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
107 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
108 "D cache %dK (%d bytes/line)\n",
109 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
110 c->x86_cache_size = (ecx>>24) + (edx>>24);
111 /* On K8 L1 TLB is inclusive, so don't count it */
112 c->x86_tlbsize = 0;
113 }
114
115 if (n >= 0x80000006) {
116 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
117 ecx = cpuid_ecx(0x80000006);
118 c->x86_cache_size = ecx >> 16;
119 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
120
121 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
122 c->x86_cache_size, ecx & 0xFF);
123 }
124 if (n >= 0x80000008) {
125 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
126 c->x86_virt_bits = (eax >> 8) & 0xff;
127 c->x86_phys_bits = eax & 0xff;
128 }
129}
130
131void __cpuinit detect_ht(struct cpuinfo_x86 *c)
132{
133#ifdef CONFIG_SMP
134 u32 eax, ebx, ecx, edx;
135 int index_msb, core_bits;
136
137 cpuid(1, &eax, &ebx, &ecx, &edx);
138
139
140 if (!cpu_has(c, X86_FEATURE_HT))
141 return;
142 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
143 goto out;
144
145 smp_num_siblings = (ebx & 0xff0000) >> 16;
146
147 if (smp_num_siblings == 1) {
148 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
149 } else if (smp_num_siblings > 1) {
150
151 if (smp_num_siblings > NR_CPUS) {
152 printk(KERN_WARNING "CPU: Unsupported number of "
153 "siblings %d", smp_num_siblings);
154 smp_num_siblings = 1;
155 return;
156 }
157
158 index_msb = get_count_order(smp_num_siblings);
159 c->phys_proc_id = phys_pkg_id(index_msb);
160
161 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
162
163 index_msb = get_count_order(smp_num_siblings);
164
165 core_bits = get_count_order(c->x86_max_cores);
166
167 c->cpu_core_id = phys_pkg_id(index_msb) &
168 ((1 << core_bits) - 1);
169 }
170out:
171 if ((c->x86_max_cores * smp_num_siblings) > 1) {
172 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
173 c->phys_proc_id);
174 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
175 c->cpu_core_id);
176 }
177
178#endif
179}
180
181static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
182{
183 char *v = c->x86_vendor_id;
184 int i;
185 static int printed;
186
187 for (i = 0; i < X86_VENDOR_NUM; i++) {
188 if (cpu_devs[i]) {
189 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
190 (cpu_devs[i]->c_ident[1] &&
191 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
192 c->x86_vendor = i;
193 this_cpu = cpu_devs[i];
194 return;
195 }
196 }
197 }
198 if (!printed) {
199 printed++;
200 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
201 printk(KERN_ERR "CPU: Your system may be unstable.\n");
202 }
203 c->x86_vendor = X86_VENDOR_UNKNOWN;
204}
205
206static void __init early_cpu_support_print(void)
207{
208 int i,j;
209 struct cpu_dev *cpu_devx;
210
211 printk("KERNEL supported cpus:\n");
212 for (i = 0; i < X86_VENDOR_NUM; i++) {
213 cpu_devx = cpu_devs[i];
214 if (!cpu_devx)
215 continue;
216 for (j = 0; j < 2; j++) {
217 if (!cpu_devx->c_ident[j])
218 continue;
219 printk(" %s %s\n", cpu_devx->c_vendor,
220 cpu_devx->c_ident[j]);
221 }
222 }
223}
224
225static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
226
227void __init early_cpu_init(void)
228{
229 struct cpu_vendor_dev *cvdev;
230
231 for (cvdev = __x86cpuvendor_start ;
232 cvdev < __x86cpuvendor_end ;
233 cvdev++)
234 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
235 early_cpu_support_print();
236 early_identify_cpu(&boot_cpu_data);
237}
238
239/* Do some early cpuid on the boot CPU to get some parameter that are
240 needed before check_bugs. Everything advanced is in identify_cpu
241 below. */
242static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
243{
244 u32 tfms, xlvl;
245
246 c->loops_per_jiffy = loops_per_jiffy;
247 c->x86_cache_size = -1;
248 c->x86_vendor = X86_VENDOR_UNKNOWN;
249 c->x86_model = c->x86_mask = 0; /* So far unknown... */
250 c->x86_vendor_id[0] = '\0'; /* Unset */
251 c->x86_model_id[0] = '\0'; /* Unset */
252 c->x86_clflush_size = 64;
253 c->x86_cache_alignment = c->x86_clflush_size;
254 c->x86_max_cores = 1;
255 c->x86_coreid_bits = 0;
256 c->extended_cpuid_level = 0;
257 memset(&c->x86_capability, 0, sizeof c->x86_capability);
258
259 /* Get vendor name */
260 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
261 (unsigned int *)&c->x86_vendor_id[0],
262 (unsigned int *)&c->x86_vendor_id[8],
263 (unsigned int *)&c->x86_vendor_id[4]);
264
265 get_cpu_vendor(c);
266
267 /* Initialize the standard set of capabilities */
268 /* Note that the vendor-specific code below might override */
269
270 /* Intel-defined flags: level 0x00000001 */
271 if (c->cpuid_level >= 0x00000001) {
272 __u32 misc;
273 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
274 &c->x86_capability[0]);
275 c->x86 = (tfms >> 8) & 0xf;
276 c->x86_model = (tfms >> 4) & 0xf;
277 c->x86_mask = tfms & 0xf;
278 if (c->x86 == 0xf)
279 c->x86 += (tfms >> 20) & 0xff;
280 if (c->x86 >= 0x6)
281 c->x86_model += ((tfms >> 16) & 0xF) << 4;
282 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
283 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
284 } else {
285 /* Have CPUID level 0 only - unheard of */
286 c->x86 = 4;
287 }
288
289 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
290#ifdef CONFIG_SMP
291 c->phys_proc_id = c->initial_apicid;
292#endif
293 /* AMD-defined flags: level 0x80000001 */
294 xlvl = cpuid_eax(0x80000000);
295 c->extended_cpuid_level = xlvl;
296 if ((xlvl & 0xffff0000) == 0x80000000) {
297 if (xlvl >= 0x80000001) {
298 c->x86_capability[1] = cpuid_edx(0x80000001);
299 c->x86_capability[6] = cpuid_ecx(0x80000001);
300 }
301 if (xlvl >= 0x80000004)
302 get_model_name(c); /* Default name */
303 }
304
305 /* Transmeta-defined flags: level 0x80860001 */
306 xlvl = cpuid_eax(0x80860000);
307 if ((xlvl & 0xffff0000) == 0x80860000) {
308 /* Don't set x86_cpuid_level here for now to not confuse. */
309 if (xlvl >= 0x80860001)
310 c->x86_capability[2] = cpuid_edx(0x80860001);
311 }
312
313 c->extended_cpuid_level = cpuid_eax(0x80000000);
314 if (c->extended_cpuid_level >= 0x80000007)
315 c->x86_power = cpuid_edx(0x80000007);
316
317 /* Assume all 64-bit CPUs support 32-bit syscall */
318 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
319
320 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
321 cpu_devs[c->x86_vendor]->c_early_init)
322 cpu_devs[c->x86_vendor]->c_early_init(c);
323
324 validate_pat_support(c);
325
326 /* early_param could clear that, but recall get it set again */
327 if (disable_apic)
328 clear_cpu_cap(c, X86_FEATURE_APIC);
329}
330
331/*
332 * This does the hard work of actually picking apart the CPU stuff...
333 */
334static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
335{
336 int i;
337
338 early_identify_cpu(c);
339
340 init_scattered_cpuid_features(c);
341
342 c->apicid = phys_pkg_id(0);
343
344 /*
345 * Vendor-specific initialization. In this section we
346 * canonicalize the feature flags, meaning if there are
347 * features a certain CPU supports which CPUID doesn't
348 * tell us, CPUID claiming incorrect flags, or other bugs,
349 * we handle them here.
350 *
351 * At the end of this section, c->x86_capability better
352 * indicate the features this CPU genuinely supports!
353 */
354 if (this_cpu->c_init)
355 this_cpu->c_init(c);
356
357 detect_ht(c);
358
359 /*
360 * On SMP, boot_cpu_data holds the common feature set between
361 * all CPUs; so make sure that we indicate which features are
362 * common between the CPUs. The first time this routine gets
363 * executed, c == &boot_cpu_data.
364 */
365 if (c != &boot_cpu_data) {
366 /* AND the already accumulated flags with these */
367 for (i = 0; i < NCAPINTS; i++)
368 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
369 }
370
371 /* Clear all flags overriden by options */
372 for (i = 0; i < NCAPINTS; i++)
373 c->x86_capability[i] &= ~cleared_cpu_caps[i];
374
375#ifdef CONFIG_X86_MCE
376 mcheck_init(c);
377#endif
378 select_idle_routine(c);
379
380#ifdef CONFIG_NUMA
381 numa_add_cpu(smp_processor_id());
382#endif
383
384}
385
386void __cpuinit identify_boot_cpu(void)
387{
388 identify_cpu(&boot_cpu_data);
389}
390
391void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
392{
393 BUG_ON(c == &boot_cpu_data);
394 identify_cpu(c);
395 mtrr_ap_init();
396}
397
398static __init int setup_noclflush(char *arg)
399{
400 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
401 return 1;
402}
403__setup("noclflush", setup_noclflush);
404
405void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
406{
407 if (c->x86_model_id[0])
408 printk(KERN_CONT "%s", c->x86_model_id);
409
410 if (c->x86_mask || c->cpuid_level >= 0)
411 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
412 else
413 printk(KERN_CONT "\n");
414}
415
416static __init int setup_disablecpuid(char *arg)
417{
418 int bit;
419 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
420 setup_clear_cpu_cap(bit);
421 else
422 return 0;
423 return 1;
424}
425__setup("clearcpuid=", setup_disablecpuid);
426
427cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
428
429struct x8664_pda **_cpu_pda __read_mostly;
430EXPORT_SYMBOL(_cpu_pda);
431
432struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
433
434char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
435
436unsigned long __supported_pte_mask __read_mostly = ~0UL;
437EXPORT_SYMBOL_GPL(__supported_pte_mask);
438
439static int do_not_nx __cpuinitdata;
440
441/* noexec=on|off
442Control non executable mappings for 64bit processes.
443
444on Enable(default)
445off Disable
446*/
447static int __init nonx_setup(char *str)
448{
449 if (!str)
450 return -EINVAL;
451 if (!strncmp(str, "on", 2)) {
452 __supported_pte_mask |= _PAGE_NX;
453 do_not_nx = 0;
454 } else if (!strncmp(str, "off", 3)) {
455 do_not_nx = 1;
456 __supported_pte_mask &= ~_PAGE_NX;
457 }
458 return 0;
459}
460early_param("noexec", nonx_setup);
461
462int force_personality32;
463
464/* noexec32=on|off
465Control non executable heap for 32bit processes.
466To control the stack too use noexec=off
467
468on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
469off PROT_READ implies PROT_EXEC
470*/
471static int __init nonx32_setup(char *str)
472{
473 if (!strcmp(str, "on"))
474 force_personality32 &= ~READ_IMPLIES_EXEC;
475 else if (!strcmp(str, "off"))
476 force_personality32 |= READ_IMPLIES_EXEC;
477 return 1;
478}
479__setup("noexec32=", nonx32_setup);
480
481void pda_init(int cpu)
482{
483 struct x8664_pda *pda = cpu_pda(cpu);
484
485 /* Setup up data that may be needed in __get_free_pages early */
486 loadsegment(fs, 0);
487 loadsegment(gs, 0);
488 /* Memory clobbers used to order PDA accessed */
489 mb();
490 wrmsrl(MSR_GS_BASE, pda);
491 mb();
492
493 pda->cpunumber = cpu;
494 pda->irqcount = -1;
495 pda->kernelstack = (unsigned long)stack_thread_info() -
496 PDA_STACKOFFSET + THREAD_SIZE;
497 pda->active_mm = &init_mm;
498 pda->mmu_state = 0;
499
500 if (cpu == 0) {
501 /* others are initialized in smpboot.c */
502 pda->pcurrent = &init_task;
503 pda->irqstackptr = boot_cpu_stack;
504 } else {
505 pda->irqstackptr = (char *)
506 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
507 if (!pda->irqstackptr)
508 panic("cannot allocate irqstack for cpu %d", cpu);
509
510 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
511 pda->nodenumber = cpu_to_node(cpu);
512 }
513
514 pda->irqstackptr += IRQSTACKSIZE-64;
515}
516
517char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
518 DEBUG_STKSZ]
519__attribute__((section(".bss.page_aligned")));
520
521extern asmlinkage void ignore_sysret(void);
522
523/* May not be marked __init: used by software suspend */
524void syscall_init(void)
525{
526 /*
527 * LSTAR and STAR live in a bit strange symbiosis.
528 * They both write to the same internal register. STAR allows to
529 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
530 */
531 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
532 wrmsrl(MSR_LSTAR, system_call);
533 wrmsrl(MSR_CSTAR, ignore_sysret);
534
535#ifdef CONFIG_IA32_EMULATION
536 syscall32_cpu_init();
537#endif
538
539 /* Flags to clear on syscall */
540 wrmsrl(MSR_SYSCALL_MASK,
541 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
542}
543
544void __cpuinit check_efer(void)
545{
546 unsigned long efer;
547
548 rdmsrl(MSR_EFER, efer);
549 if (!(efer & EFER_NX) || do_not_nx)
550 __supported_pte_mask &= ~_PAGE_NX;
551}
552
553unsigned long kernel_eflags;
554
555/*
556 * Copies of the original ist values from the tss are only accessed during
557 * debugging, no special alignment required.
558 */
559DEFINE_PER_CPU(struct orig_ist, orig_ist);
560
561/*
562 * cpu_init() initializes state that is per-CPU. Some data is already
563 * initialized (naturally) in the bootstrap process, such as the GDT
564 * and IDT. We reload them nevertheless, this function acts as a
565 * 'CPU state barrier', nothing should get across.
566 * A lot of state is already set up in PDA init.
567 */
568void __cpuinit cpu_init(void)
569{
570 int cpu = stack_smp_processor_id();
571 struct tss_struct *t = &per_cpu(init_tss, cpu);
572 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
573 unsigned long v;
574 char *estacks = NULL;
575 struct task_struct *me;
576 int i;
577
578 /* CPU 0 is initialised in head64.c */
579 if (cpu != 0)
580 pda_init(cpu);
581 else
582 estacks = boot_exception_stacks;
583
584 me = current;
585
586 if (cpu_test_and_set(cpu, cpu_initialized))
587 panic("CPU#%d already initialized!\n", cpu);
588
589 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
590
591 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
592
593 /*
594 * Initialize the per-CPU GDT with the boot GDT,
595 * and set up the GDT descriptor:
596 */
597
598 switch_to_new_gdt();
599 load_idt((const struct desc_ptr *)&idt_descr);
600
601 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
602 syscall_init();
603
604 wrmsrl(MSR_FS_BASE, 0);
605 wrmsrl(MSR_KERNEL_GS_BASE, 0);
606 barrier();
607
608 check_efer();
609
610 /*
611 * set up and load the per-CPU TSS
612 */
613 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
614 static const unsigned int order[N_EXCEPTION_STACKS] = {
615 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
616 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
617 };
618 if (cpu) {
619 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
620 if (!estacks)
621 panic("Cannot allocate exception stack %ld %d\n",
622 v, cpu);
623 }
624 estacks += PAGE_SIZE << order[v];
625 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
626 }
627
628 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
629 /*
630 * <= is required because the CPU will access up to
631 * 8 bits beyond the end of the IO permission bitmap.
632 */
633 for (i = 0; i <= IO_BITMAP_LONGS; i++)
634 t->io_bitmap[i] = ~0UL;
635
636 atomic_inc(&init_mm.mm_count);
637 me->active_mm = &init_mm;
638 if (me->mm)
639 BUG();
640 enter_lazy_tlb(&init_mm, me);
641
642 load_sp0(t, &current->thread);
643 set_tss_desc(cpu, t);
644 load_TR_desc();
645 load_LDT(&init_mm.context);
646
647#ifdef CONFIG_KGDB
648 /*
649 * If the kgdb is connected no debug regs should be altered. This
650 * is only applicable when KGDB and a KGDB I/O module are built
651 * into the kernel and you are using early debugging with
652 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
653 */
654 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
655 arch_kgdb_ops.correct_hw_break();
656 else {
657#endif
658 /*
659 * Clear all 6 debug registers:
660 */
661
662 set_debugreg(0UL, 0);
663 set_debugreg(0UL, 1);
664 set_debugreg(0UL, 2);
665 set_debugreg(0UL, 3);
666 set_debugreg(0UL, 6);
667 set_debugreg(0UL, 7);
668#ifdef CONFIG_KGDB
669 /* If the kgdb is connected no debug regs should be altered. */
670 }
671#endif
672
673 fpu_init();
674
675 raw_local_save_flags(kernel_eflags);
676
677 if (is_uv_system())
678 uv_cpu_init();
679}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 783691b2a738..4d894e8565fe 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,6 @@
1#ifndef ARCH_X86_CPU_H
2
3#define ARCH_X86_CPU_H
1 4
2struct cpu_model_info { 5struct cpu_model_info {
3 int vendor; 6 int vendor;
@@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
36 39
37extern int get_model_name(struct cpuinfo_x86 *c); 40extern int get_model_name(struct cpuinfo_x86 *c);
38extern void display_cacheinfo(struct cpuinfo_x86 *c); 41extern void display_cacheinfo(struct cpuinfo_x86 *c);
42
43#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index f03e9153618e..965ea52767ac 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -26,9 +26,10 @@
26#define NFORCE2_SAFE_DISTANCE 50 26#define NFORCE2_SAFE_DISTANCE 50
27 27
28/* Delay in ms between FSB changes */ 28/* Delay in ms between FSB changes */
29//#define NFORCE2_DELAY 10 29/* #define NFORCE2_DELAY 10 */
30 30
31/* nforce2_chipset: 31/*
32 * nforce2_chipset:
32 * FSB is changed using the chipset 33 * FSB is changed using the chipset
33 */ 34 */
34static struct pci_dev *nforce2_chipset_dev; 35static struct pci_dev *nforce2_chipset_dev;
@@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev;
36/* fid: 37/* fid:
37 * multiplier * 10 38 * multiplier * 10
38 */ 39 */
39static int fid = 0; 40static int fid;
40 41
41/* min_fsb, max_fsb: 42/* min_fsb, max_fsb:
42 * minimum and maximum FSB (= FSB at boot time) 43 * minimum and maximum FSB (= FSB at boot time)
43 */ 44 */
44static int min_fsb = 0; 45static int min_fsb;
45static int max_fsb = 0; 46static int max_fsb;
46 47
47MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); 48MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
48MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); 49MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
@@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444);
53 54
54MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); 55MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
55MODULE_PARM_DESC(min_fsb, 56MODULE_PARM_DESC(min_fsb,
56 "Minimum FSB to use, if not defined: current FSB - 50"); 57 "Minimum FSB to use, if not defined: current FSB - 50");
57 58
58#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) 59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg)
59 60
@@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb)
139 140
140 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ 141 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
141 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 142 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
142 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); 143 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL);
143 if (!nforce2_sub5) 144 if (!nforce2_sub5)
144 return 0; 145 return 0;
145 146
@@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
147 fsb /= 1000000; 148 fsb /= 1000000;
148 149
149 /* Check if PLL register is already set */ 150 /* Check if PLL register is already set */
150 pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); 151 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
151 152
152 if(bootfsb || !temp) 153 if (bootfsb || !temp)
153 return fsb; 154 return fsb;
154 155
155 /* Use PLL register FSB value */ 156 /* Use PLL register FSB value */
156 pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); 157 pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp);
157 fsb = nforce2_calc_fsb(temp); 158 fsb = nforce2_calc_fsb(temp);
158 159
159 return fsb; 160 return fsb;
@@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb)
184 } 185 }
185 186
186 /* First write? Then set actual value */ 187 /* First write? Then set actual value */
187 pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); 188 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
188 if (!temp) { 189 if (!temp) {
189 pll = nforce2_calc_pll(tfsb); 190 pll = nforce2_calc_pll(tfsb);
190 191
@@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb)
210 tfsb--; 211 tfsb--;
211 212
212 /* Calculate the PLL reg. value */ 213 /* Calculate the PLL reg. value */
213 if ((pll = nforce2_calc_pll(tfsb)) == -1) 214 pll = nforce2_calc_pll(tfsb);
215 if (pll == -1)
214 return -EINVAL; 216 return -EINVAL;
215 217
216 nforce2_write_pll(pll); 218 nforce2_write_pll(pll);
@@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu)
249static int nforce2_target(struct cpufreq_policy *policy, 251static int nforce2_target(struct cpufreq_policy *policy,
250 unsigned int target_freq, unsigned int relation) 252 unsigned int target_freq, unsigned int relation)
251{ 253{
252// unsigned long flags; 254/* unsigned long flags; */
253 struct cpufreq_freqs freqs; 255 struct cpufreq_freqs freqs;
254 unsigned int target_fsb; 256 unsigned int target_fsb;
255 257
@@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy,
271 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 273 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
272 274
273 /* Disable IRQs */ 275 /* Disable IRQs */
274 //local_irq_save(flags); 276 /* local_irq_save(flags); */
275 277
276 if (nforce2_set_fsb(target_fsb) < 0) 278 if (nforce2_set_fsb(target_fsb) < 0)
277 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", 279 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
278 target_fsb); 280 target_fsb);
279 else 281 else
280 dprintk("Changed FSB successfully to %d\n", 282 dprintk("Changed FSB successfully to %d\n",
281 target_fsb); 283 target_fsb);
282 284
283 /* Enable IRQs */ 285 /* Enable IRQs */
284 //local_irq_restore(flags); 286 /* local_irq_restore(flags); */
285 287
286 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 288 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
287 289
@@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy)
302 policy->max = (fsb_pol_max + 1) * fid * 100; 304 policy->max = (fsb_pol_max + 1) * fid * 100;
303 305
304 cpufreq_verify_within_limits(policy, 306 cpufreq_verify_within_limits(policy,
305 policy->cpuinfo.min_freq, 307 policy->cpuinfo.min_freq,
306 policy->cpuinfo.max_freq); 308 policy->cpuinfo.max_freq);
307 return 0; 309 return 0;
308} 310}
309 311
@@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
347 /* Set maximum FSB to FSB at boot time */ 349 /* Set maximum FSB to FSB at boot time */
348 max_fsb = nforce2_fsb_read(1); 350 max_fsb = nforce2_fsb_read(1);
349 351
350 if(!max_fsb) 352 if (!max_fsb)
351 return -EIO; 353 return -EIO;
352 354
353 if (!min_fsb) 355 if (!min_fsb)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
new file mode 100644
index 000000000000..02f773399e39
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -0,0 +1,105 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3#include <asm/processor.h>
4#include <asm/ptrace.h>
5#include <asm/topology.h>
6#include <asm/numa_64.h>
7
8#include "cpu.h"
9
10static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
11{
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15
16 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
17}
18
19/*
20 * find out the number of processor cores on the die
21 */
22static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
23{
24 unsigned int eax, t;
25
26 if (c->cpuid_level < 4)
27 return 1;
28
29 cpuid_count(4, 0, &eax, &t, &t, &t);
30
31 if (eax & 0x1f)
32 return ((eax >> 26) + 1);
33 else
34 return 1;
35}
36
37static void __cpuinit srat_detect_node(void)
38{
39#ifdef CONFIG_NUMA
40 unsigned node;
41 int cpu = smp_processor_id();
42 int apicid = hard_smp_processor_id();
43
44 /* Don't do the funky fallback heuristics the AMD version employs
45 for now. */
46 node = apicid_to_node[apicid];
47 if (node == NUMA_NO_NODE || !node_online(node))
48 node = first_node(node_online_map);
49 numa_set_node(cpu, node);
50
51 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
52#endif
53}
54
55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
56{
57 /* Cache sizes */
58 unsigned n;
59
60 init_intel_cacheinfo(c);
61 if (c->cpuid_level > 9) {
62 unsigned eax = cpuid_eax(10);
63 /* Check for version and the number of counters */
64 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
65 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
66 }
67
68 if (cpu_has_ds) {
69 unsigned int l1, l2;
70 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
71 if (!(l1 & (1<<11)))
72 set_cpu_cap(c, X86_FEATURE_BTS);
73 if (!(l1 & (1<<12)))
74 set_cpu_cap(c, X86_FEATURE_PEBS);
75 }
76
77
78 if (cpu_has_bts)
79 ds_init_intel(c);
80
81 n = c->extended_cpuid_level;
82 if (n >= 0x80000008) {
83 unsigned eax = cpuid_eax(0x80000008);
84 c->x86_virt_bits = (eax >> 8) & 0xff;
85 c->x86_phys_bits = eax & 0xff;
86 }
87
88 if (c->x86 == 15)
89 c->x86_cache_alignment = c->x86_clflush_size * 2;
90 if (c->x86 == 6)
91 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
92 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
93 c->x86_max_cores = intel_num_cpu_cores(c);
94
95 srat_detect_node();
96}
97
98static struct cpu_dev intel_cpu_dev __cpuinitdata = {
99 .c_vendor = "Intel",
100 .c_ident = { "GenuineIntel" },
101 .c_early_init = early_init_intel,
102 .c_init = init_intel,
103};
104cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
105
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 26d615dcb149..2c8afafa18e8 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata =
62 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 62 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
63 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 63 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
64 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 64 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
65 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
65 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 66 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
66 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 67 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
67 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 68 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index e633c9c2b764..f390c9f66351 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -9,23 +9,23 @@
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/smp.h> 10#include <linux/smp.h>
11 11
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/system.h> 13#include <asm/system.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15 15
16#include "mce.h" 16#include "mce.h"
17 17
18/* Machine Check Handler For AMD Athlon/Duron */ 18/* Machine Check Handler For AMD Athlon/Duron */
19static void k7_machine_check(struct pt_regs * regs, long error_code) 19static void k7_machine_check(struct pt_regs *regs, long error_code)
20{ 20{
21 int recover=1; 21 int recover = 1;
22 u32 alow, ahigh, high, low; 22 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth; 23 u32 mcgstl, mcgsth;
24 int i; 24 int i;
25 25
26 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 26 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
27 if (mcgstl & (1<<0)) /* Recoverable ? */ 27 if (mcgstl & (1<<0)) /* Recoverable ? */
28 recover=0; 28 recover = 0;
29 29
30 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 30 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
31 smp_processor_id(), mcgsth, mcgstl); 31 smp_processor_id(), mcgsth, mcgstl);
@@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code)
60 } 60 }
61 61
62 if (recover&2) 62 if (recover&2)
63 panic ("CPU context corrupt"); 63 panic("CPU context corrupt");
64 if (recover&1) 64 if (recover&1)
65 panic ("Unable to continue"); 65 panic("Unable to continue");
66 printk (KERN_EMERG "Attempting to continue.\n"); 66 printk(KERN_EMERG "Attempting to continue.\n");
67 mcgstl &= ~(1<<2); 67 mcgstl &= ~(1<<2);
68 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 68 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
69} 69}
70 70
71 71
@@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
81 machine_check_vector = k7_machine_check; 81 machine_check_vector = k7_machine_check;
82 wmb(); 82 wmb();
83 83
84 printk (KERN_INFO "Intel machine check architecture supported.\n"); 84 printk(KERN_INFO "Intel machine check architecture supported.\n");
85 rdmsr (MSR_IA32_MCG_CAP, l, h); 85 rdmsr(MSR_IA32_MCG_CAP, l, h);
86 if (l & (1<<8)) /* Control register present ? */ 86 if (l & (1<<8)) /* Control register present ? */
87 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 87 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
88 nr_mce_banks = l & 0xff; 88 nr_mce_banks = l & 0xff;
89 89
90 /* Clear status for MC index 0 separately, we don't touch CTL, 90 /* Clear status for MC index 0 separately, we don't touch CTL,
91 * as some K7 Athlons cause spurious MCEs when its enabled. */ 91 * as some K7 Athlons cause spurious MCEs when its enabled. */
92 if (boot_cpu_data.x86 == 6) { 92 if (boot_cpu_data.x86 == 6) {
93 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); 93 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
94 i = 1; 94 i = 1;
95 } else 95 } else
96 i = 0; 96 i = 0;
97 for (; i<nr_mce_banks; i++) { 97 for (; i < nr_mce_banks; i++) {
98 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 98 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
99 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 99 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
100 } 100 }
101 101
102 set_in_cr4 (X86_CR4_MCE); 102 set_in_cr4(X86_CR4_MCE);
103 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 103 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
104 smp_processor_id()); 104 smp_processor_id());
105} 105}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index e07e8c068ae0..501ca1cea27d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -31,7 +31,7 @@
31#include <asm/idle.h> 31#include <asm/idle.h>
32 32
33#define MISC_MCELOG_MINOR 227 33#define MISC_MCELOG_MINOR 227
34#define NR_BANKS 6 34#define NR_SYSFS_BANKS 6
35 35
36atomic_t mce_entry; 36atomic_t mce_entry;
37 37
@@ -46,7 +46,7 @@ static int mce_dont_init;
46 */ 46 */
47static int tolerant = 1; 47static int tolerant = 1;
48static int banks; 48static int banks;
49static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; 49static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
50static unsigned long notify_user; 50static unsigned long notify_user;
51static int rip_msr; 51static int rip_msr;
52static int mce_bootlog = -1; 52static int mce_bootlog = -1;
@@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
209 barrier(); 209 barrier();
210 210
211 for (i = 0; i < banks; i++) { 211 for (i = 0; i < banks; i++) {
212 if (!bank[i]) 212 if (i < NR_SYSFS_BANKS && !bank[i])
213 continue; 213 continue;
214 214
215 m.misc = 0; 215 m.misc = 0;
@@ -444,9 +444,10 @@ static void mce_init(void *dummy)
444 444
445 rdmsrl(MSR_IA32_MCG_CAP, cap); 445 rdmsrl(MSR_IA32_MCG_CAP, cap);
446 banks = cap & 0xff; 446 banks = cap & 0xff;
447 if (banks > NR_BANKS) { 447 if (banks > MCE_EXTENDED_BANK) {
448 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); 448 banks = MCE_EXTENDED_BANK;
449 banks = NR_BANKS; 449 printk(KERN_INFO "MCE: warning: using only %d banks\n",
450 MCE_EXTENDED_BANK);
450 } 451 }
451 /* Use accurate RIP reporting if available. */ 452 /* Use accurate RIP reporting if available. */
452 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 453 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -462,7 +463,11 @@ static void mce_init(void *dummy)
462 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 463 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
463 464
464 for (i = 0; i < banks; i++) { 465 for (i = 0; i < banks; i++) {
465 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 466 if (i < NR_SYSFS_BANKS)
467 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
468 else
469 wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
470
466 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 471 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
467 } 472 }
468} 473}
@@ -766,7 +771,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
766 } \ 771 } \
767 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 772 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
768 773
769/* TBD should generate these dynamically based on number of available banks */ 774/*
775 * TBD should generate these dynamically based on number of available banks.
776 * Have only 6 contol banks in /sysfs until then.
777 */
770ACCESSOR(bank0ctl,bank[0],mce_restart()) 778ACCESSOR(bank0ctl,bank[0],mce_restart())
771ACCESSOR(bank1ctl,bank[1],mce_restart()) 779ACCESSOR(bank1ctl,bank[1],mce_restart())
772ACCESSOR(bank2ctl,bank[2],mce_restart()) 780ACCESSOR(bank2ctl,bank[2],mce_restart())
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index cb03345554a5..eef001ad3bde 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -8,7 +8,7 @@
8#include <linux/interrupt.h> 8#include <linux/interrupt.h>
9#include <linux/smp.h> 9#include <linux/smp.h>
10 10
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
@@ -32,12 +32,12 @@ struct intel_mce_extended_msrs {
32 /* u32 *reserved[]; */ 32 /* u32 *reserved[]; */
33}; 33};
34 34
35static int mce_num_extended_msrs = 0; 35static int mce_num_extended_msrs;
36 36
37 37
38#ifdef CONFIG_X86_MCE_P4THERMAL 38#ifdef CONFIG_X86_MCE_P4THERMAL
39static void unexpected_thermal_interrupt(struct pt_regs *regs) 39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{ 40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id()); 42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK); 43 add_taint(TAINT_MACHINE_CHECK);
@@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
83 * be some SMM goo which handles it, so we can't even put a handler 83 * be some SMM goo which handles it, so we can't even put a handler
84 * since it might be delivered via SMI already -zwanem. 84 * since it might be delivered via SMI already -zwanem.
85 */ 85 */
86 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
@@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
92 } 92 }
93 93
94 /* check whether a vector already exists, temporarily masked? */ 94 /* check whether a vector already exists, temporarily masked? */
95 if (h & APIC_VECTOR_MASK) { 95 if (h & APIC_VECTOR_MASK) {
96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " 96 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
97 "installed\n", 97 "installed\n",
@@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ 104 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
105 apic_write_around(APIC_LVTTHMR, h); 105 apic_write_around(APIC_LVTTHMR, h);
106 106
107 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); 107 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
108 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); 108 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
109 109
110 /* ok we're good to go... */ 110 /* ok we're good to go... */
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112
113 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
115 112
116 l = apic_read (APIC_LVTTHMR); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
117 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
118 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 115
116 l = apic_read(APIC_LVTTHMR);
117 apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
118 printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
119 119
120 /* enable thermal throttle processing */ 120 /* enable thermal throttle processing */
121 atomic_set(&therm_throt_en, 1); 121 atomic_set(&therm_throt_en, 1);
@@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
129{ 129{
130 u32 h; 130 u32 h;
131 131
132 rdmsr (MSR_IA32_MCG_EAX, r->eax, h); 132 rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
133 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); 133 rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
134 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); 134 rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
135 rdmsr (MSR_IA32_MCG_EDX, r->edx, h); 135 rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
136 rdmsr (MSR_IA32_MCG_ESI, r->esi, h); 136 rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
137 rdmsr (MSR_IA32_MCG_EDI, r->edi, h); 137 rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
138 rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); 138 rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
139 rdmsr (MSR_IA32_MCG_ESP, r->esp, h); 139 rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
140 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); 140 rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
141 rdmsr (MSR_IA32_MCG_EIP, r->eip, h); 141 rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
142} 142}
143 143
144static void intel_machine_check(struct pt_regs * regs, long error_code) 144static void intel_machine_check(struct pt_regs *regs, long error_code)
145{ 145{
146 int recover=1; 146 int recover = 1;
147 u32 alow, ahigh, high, low; 147 u32 alow, ahigh, high, low;
148 u32 mcgstl, mcgsth; 148 u32 mcgstl, mcgsth;
149 int i; 149 int i;
150 150
151 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); 151 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
152 if (mcgstl & (1<<0)) /* Recoverable ? */ 152 if (mcgstl & (1<<0)) /* Recoverable ? */
153 recover=0; 153 recover = 0;
154 154
155 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", 155 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
156 smp_processor_id(), mcgsth, mcgstl); 156 smp_processor_id(), mcgsth, mcgstl);
@@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
191 } 191 }
192 192
193 if (recover & 2) 193 if (recover & 2)
194 panic ("CPU context corrupt"); 194 panic("CPU context corrupt");
195 if (recover & 1) 195 if (recover & 1)
196 panic ("Unable to continue"); 196 panic("Unable to continue");
197 197
198 printk(KERN_EMERG "Attempting to continue.\n"); 198 printk(KERN_EMERG "Attempting to continue.\n");
199 /* 199 /*
200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 200 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
201 * recoverable/continuable.This will allow BIOS to look at the MSRs 201 * recoverable/continuable.This will allow BIOS to look at the MSRs
202 * for errors if the OS could not log the error. 202 * for errors if the OS could not log the error.
203 */ 203 */
204 for (i=0; i<nr_mce_banks; i++) { 204 for (i = 0; i < nr_mce_banks; i++) {
205 u32 msr; 205 u32 msr;
206 msr = MSR_IA32_MC0_STATUS+i*4; 206 msr = MSR_IA32_MC0_STATUS+i*4;
207 rdmsr (msr, low, high); 207 rdmsr(msr, low, high);
208 if (high&(1<<31)) { 208 if (high&(1<<31)) {
209 /* Clear it */ 209 /* Clear it */
210 wrmsr(msr, 0UL, 0UL); 210 wrmsr(msr, 0UL, 0UL);
@@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
214 } 214 }
215 } 215 }
216 mcgstl &= ~(1<<2); 216 mcgstl &= ~(1<<2);
217 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); 217 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
218} 218}
219 219
220 220
@@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
222{ 222{
223 u32 l, h; 223 u32 l, h;
224 int i; 224 int i;
225 225
226 machine_check_vector = intel_machine_check; 226 machine_check_vector = intel_machine_check;
227 wmb(); 227 wmb();
228 228
229 printk (KERN_INFO "Intel machine check architecture supported.\n"); 229 printk(KERN_INFO "Intel machine check architecture supported.\n");
230 rdmsr (MSR_IA32_MCG_CAP, l, h); 230 rdmsr(MSR_IA32_MCG_CAP, l, h);
231 if (l & (1<<8)) /* Control register present ? */ 231 if (l & (1<<8)) /* Control register present ? */
232 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 232 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
233 nr_mce_banks = l & 0xff; 233 nr_mce_banks = l & 0xff;
234 234
235 for (i=0; i<nr_mce_banks; i++) { 235 for (i = 0; i < nr_mce_banks; i++) {
236 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); 236 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
237 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); 237 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
238 } 238 }
239 239
240 set_in_cr4 (X86_CR4_MCE); 240 set_in_cr4(X86_CR4_MCE);
241 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", 241 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
242 smp_processor_id()); 242 smp_processor_id());
243 243
244 /* Check for P4/Xeon extended MCE MSRs */ 244 /* Check for P4/Xeon extended MCE MSRs */
245 rdmsr (MSR_IA32_MCG_CAP, l, h); 245 rdmsr(MSR_IA32_MCG_CAP, l, h);
246 if (l & (1<<9)) {/* MCG_EXT_P */ 246 if (l & (1<<9)) {/* MCG_EXT_P */
247 mce_num_extended_msrs = (l >> 16) & 0xff; 247 mce_num_extended_msrs = (l >> 16) & 0xff;
248 printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" 248 printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
249 " available\n", 249 " available\n",
250 smp_processor_id(), mce_num_extended_msrs); 250 smp_processor_id(), mce_num_extended_msrs);
251 251
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 5d241ce94a44..509bd3d9eacd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
37static unsigned long smp_changes_mask; 37static unsigned long smp_changes_mask;
38static struct mtrr_state mtrr_state = {}; 38static struct mtrr_state mtrr_state = {};
39static int mtrr_state_set; 39static int mtrr_state_set;
40static u64 tom2; 40u64 mtrr_tom2;
41 41
42#undef MODULE_PARAM_PREFIX 42#undef MODULE_PARAM_PREFIX
43#define MODULE_PARAM_PREFIX "mtrr." 43#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
139 } 139 }
140 } 140 }
141 141
142 if (tom2) { 142 if (mtrr_tom2) {
143 if (start >= (1ULL<<32) && (end < tom2)) 143 if (start >= (1ULL<<32) && (end < mtrr_tom2))
144 return MTRR_TYPE_WRBACK; 144 return MTRR_TYPE_WRBACK;
145 } 145 }
146 146
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
158 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); 158 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
159} 159}
160 160
161/* fill the MSR pair relating to a var range */
162void fill_mtrr_var_range(unsigned int index,
163 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
164{
165 struct mtrr_var_range *vr;
166
167 vr = mtrr_state.var_ranges;
168
169 vr[index].base_lo = base_lo;
170 vr[index].base_hi = base_hi;
171 vr[index].mask_lo = mask_lo;
172 vr[index].mask_hi = mask_hi;
173}
174
161static void 175static void
162get_fixed_ranges(mtrr_type * frs) 176get_fixed_ranges(mtrr_type * frs)
163{ 177{
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
213 mtrr_state.enabled = (lo & 0xc00) >> 10; 227 mtrr_state.enabled = (lo & 0xc00) >> 10;
214 228
215 if (amd_special_default_mtrr()) { 229 if (amd_special_default_mtrr()) {
216 unsigned lo, hi; 230 unsigned low, high;
217 /* TOP_MEM2 */ 231 /* TOP_MEM2 */
218 rdmsr(MSR_K8_TOP_MEM2, lo, hi); 232 rdmsr(MSR_K8_TOP_MEM2, low, high);
219 tom2 = hi; 233 mtrr_tom2 = high;
220 tom2 <<= 32; 234 mtrr_tom2 <<= 32;
221 tom2 |= lo; 235 mtrr_tom2 |= low;
222 tom2 &= 0xffffff8000000ULL; 236 mtrr_tom2 &= 0xffffff800000ULL;
223 } 237 }
224 if (mtrr_show) { 238 if (mtrr_show) {
225 int high_width; 239 int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
251 else 265 else
252 printk(KERN_INFO "MTRR %u disabled\n", i); 266 printk(KERN_INFO "MTRR %u disabled\n", i);
253 } 267 }
254 if (tom2) { 268 if (mtrr_tom2) {
255 printk(KERN_INFO "TOM2: %016llx aka %lldM\n", 269 printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
256 tom2, tom2>>20); 270 mtrr_tom2, mtrr_tom2>>20);
257 } 271 }
258 } 272 }
259 mtrr_state_set = 1; 273 mtrr_state_set = 1;
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
328 342
329 if (lo != msrwords[0] || hi != msrwords[1]) { 343 if (lo != msrwords[0] || hi != msrwords[1]) {
330 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 344 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
331 boot_cpu_data.x86 == 15 && 345 (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
332 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) 346 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
333 k8_enable_fixed_iorrs(); 347 k8_enable_fixed_iorrs();
334 mtrr_wrmsr(msr, msrwords[0], msrwords[1]); 348 mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6a1e278d9323..105afe12beb0 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
37#include <linux/smp.h> 37#include <linux/smp.h>
38#include <linux/cpu.h> 38#include <linux/cpu.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/sort.h>
40 41
41#include <asm/e820.h> 42#include <asm/e820.h>
42#include <asm/mtrr.h> 43#include <asm/mtrr.h>
@@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = {
609 .resume = mtrr_restore, 610 .resume = mtrr_restore,
610}; 611};
611 612
613/* should be related to MTRR_VAR_RANGES nums */
614#define RANGE_NUM 256
615
616struct res_range {
617 unsigned long start;
618 unsigned long end;
619};
620
621static int __init
622add_range(struct res_range *range, int nr_range, unsigned long start,
623 unsigned long end)
624{
625 /* out of slots */
626 if (nr_range >= RANGE_NUM)
627 return nr_range;
628
629 range[nr_range].start = start;
630 range[nr_range].end = end;
631
632 nr_range++;
633
634 return nr_range;
635}
636
637static int __init
638add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
639 unsigned long end)
640{
641 int i;
642
643 /* try to merge it with old one */
644 for (i = 0; i < nr_range; i++) {
645 unsigned long final_start, final_end;
646 unsigned long common_start, common_end;
647
648 if (!range[i].end)
649 continue;
650
651 common_start = max(range[i].start, start);
652 common_end = min(range[i].end, end);
653 if (common_start > common_end + 1)
654 continue;
655
656 final_start = min(range[i].start, start);
657 final_end = max(range[i].end, end);
658
659 range[i].start = final_start;
660 range[i].end = final_end;
661 return nr_range;
662 }
663
664 /* need to add that */
665 return add_range(range, nr_range, start, end);
666}
667
668static void __init
669subtract_range(struct res_range *range, unsigned long start, unsigned long end)
670{
671 int i, j;
672
673 for (j = 0; j < RANGE_NUM; j++) {
674 if (!range[j].end)
675 continue;
676
677 if (start <= range[j].start && end >= range[j].end) {
678 range[j].start = 0;
679 range[j].end = 0;
680 continue;
681 }
682
683 if (start <= range[j].start && end < range[j].end &&
684 range[j].start < end + 1) {
685 range[j].start = end + 1;
686 continue;
687 }
688
689
690 if (start > range[j].start && end >= range[j].end &&
691 range[j].end > start - 1) {
692 range[j].end = start - 1;
693 continue;
694 }
695
696 if (start > range[j].start && end < range[j].end) {
697 /* find the new spare */
698 for (i = 0; i < RANGE_NUM; i++) {
699 if (range[i].end == 0)
700 break;
701 }
702 if (i < RANGE_NUM) {
703 range[i].end = range[j].end;
704 range[i].start = end + 1;
705 } else {
706 printk(KERN_ERR "run of slot in ranges\n");
707 }
708 range[j].end = start - 1;
709 continue;
710 }
711 }
712}
713
714static int __init cmp_range(const void *x1, const void *x2)
715{
716 const struct res_range *r1 = x1;
717 const struct res_range *r2 = x2;
718 long start1, start2;
719
720 start1 = r1->start;
721 start2 = r2->start;
722
723 return start1 - start2;
724}
725
726struct var_mtrr_range_state {
727 unsigned long base_pfn;
728 unsigned long size_pfn;
729 mtrr_type type;
730};
731
732struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print;
734
735static int __init
736x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
737 unsigned long extra_remove_base,
738 unsigned long extra_remove_size)
739{
740 unsigned long i, base, size;
741 mtrr_type type;
742
743 for (i = 0; i < num_var_ranges; i++) {
744 type = range_state[i].type;
745 if (type != MTRR_TYPE_WRBACK)
746 continue;
747 base = range_state[i].base_pfn;
748 size = range_state[i].size_pfn;
749 nr_range = add_range_with_merge(range, nr_range, base,
750 base + size - 1);
751 }
752 if (debug_print) {
753 printk(KERN_DEBUG "After WB checking\n");
754 for (i = 0; i < nr_range; i++)
755 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
756 range[i].start, range[i].end + 1);
757 }
758
759 /* take out UC ranges */
760 for (i = 0; i < num_var_ranges; i++) {
761 type = range_state[i].type;
762 if (type != MTRR_TYPE_UNCACHABLE)
763 continue;
764 size = range_state[i].size_pfn;
765 if (!size)
766 continue;
767 base = range_state[i].base_pfn;
768 subtract_range(range, base, base + size - 1);
769 }
770 if (extra_remove_size)
771 subtract_range(range, extra_remove_base,
772 extra_remove_base + extra_remove_size - 1);
773
774 /* get new range num */
775 nr_range = 0;
776 for (i = 0; i < RANGE_NUM; i++) {
777 if (!range[i].end)
778 continue;
779 nr_range++;
780 }
781 if (debug_print) {
782 printk(KERN_DEBUG "After UC checking\n");
783 for (i = 0; i < nr_range; i++)
784 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
785 range[i].start, range[i].end + 1);
786 }
787
788 /* sort the ranges */
789 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
790 if (debug_print) {
791 printk(KERN_DEBUG "After sorting\n");
792 for (i = 0; i < nr_range; i++)
793 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
794 range[i].start, range[i].end + 1);
795 }
796
797 /* clear those is not used */
798 for (i = nr_range; i < RANGE_NUM; i++)
799 memset(&range[i], 0, sizeof(range[i]));
800
801 return nr_range;
802}
803
804static struct res_range __initdata range[RANGE_NUM];
805
806#ifdef CONFIG_MTRR_SANITIZER
807
808static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
809{
810 unsigned long sum;
811 int i;
812
813 sum = 0;
814 for (i = 0; i < nr_range; i++)
815 sum += range[i].end + 1 - range[i].start;
816
817 return sum;
818}
819
820static int enable_mtrr_cleanup __initdata =
821 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
822
823static int __init disable_mtrr_cleanup_setup(char *str)
824{
825 if (enable_mtrr_cleanup != -1)
826 enable_mtrr_cleanup = 0;
827 return 0;
828}
829early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
830
831static int __init enable_mtrr_cleanup_setup(char *str)
832{
833 if (enable_mtrr_cleanup != -1)
834 enable_mtrr_cleanup = 1;
835 return 0;
836}
837early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
838
839struct var_mtrr_state {
840 unsigned long range_startk;
841 unsigned long range_sizek;
842 unsigned long chunk_sizek;
843 unsigned long gran_sizek;
844 unsigned int reg;
845};
846
847static void __init
848set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
849 unsigned char type, unsigned int address_bits)
850{
851 u32 base_lo, base_hi, mask_lo, mask_hi;
852 u64 base, mask;
853
854 if (!sizek) {
855 fill_mtrr_var_range(reg, 0, 0, 0, 0);
856 return;
857 }
858
859 mask = (1ULL << address_bits) - 1;
860 mask &= ~((((u64)sizek) << 10) - 1);
861
862 base = ((u64)basek) << 10;
863
864 base |= type;
865 mask |= 0x800;
866
867 base_lo = base & ((1ULL<<32) - 1);
868 base_hi = base >> 32;
869
870 mask_lo = mask & ((1ULL<<32) - 1);
871 mask_hi = mask >> 32;
872
873 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
874}
875
876static void __init
877save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
878 unsigned char type)
879{
880 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
881 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
882 range_state[reg].type = type;
883}
884
885static void __init
886set_var_mtrr_all(unsigned int address_bits)
887{
888 unsigned long basek, sizek;
889 unsigned char type;
890 unsigned int reg;
891
892 for (reg = 0; reg < num_var_ranges; reg++) {
893 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
894 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
895 type = range_state[reg].type;
896
897 set_var_mtrr(reg, basek, sizek, type, address_bits);
898 }
899}
900
901static unsigned int __init
902range_to_mtrr(unsigned int reg, unsigned long range_startk,
903 unsigned long range_sizek, unsigned char type)
904{
905 if (!range_sizek || (reg >= num_var_ranges))
906 return reg;
907
908 while (range_sizek) {
909 unsigned long max_align, align;
910 unsigned long sizek;
911
912 /* Compute the maximum size I can make a range */
913 if (range_startk)
914 max_align = ffs(range_startk) - 1;
915 else
916 max_align = 32;
917 align = fls(range_sizek) - 1;
918 if (align > max_align)
919 align = max_align;
920
921 sizek = 1 << align;
922 if (debug_print)
923 printk(KERN_DEBUG "Setting variable MTRR %d, "
924 "base: %ldMB, range: %ldMB, type %s\n",
925 reg, range_startk >> 10, sizek >> 10,
926 (type == MTRR_TYPE_UNCACHABLE)?"UC":
927 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
928 );
929 save_var_mtrr(reg++, range_startk, sizek, type);
930 range_startk += sizek;
931 range_sizek -= sizek;
932 if (reg >= num_var_ranges)
933 break;
934 }
935 return reg;
936}
937
938static unsigned __init
939range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
940 unsigned long sizek)
941{
942 unsigned long hole_basek, hole_sizek;
943 unsigned long second_basek, second_sizek;
944 unsigned long range0_basek, range0_sizek;
945 unsigned long range_basek, range_sizek;
946 unsigned long chunk_sizek;
947 unsigned long gran_sizek;
948
949 hole_basek = 0;
950 hole_sizek = 0;
951 second_basek = 0;
952 second_sizek = 0;
953 chunk_sizek = state->chunk_sizek;
954 gran_sizek = state->gran_sizek;
955
956 /* align with gran size, prevent small block used up MTRRs */
957 range_basek = ALIGN(state->range_startk, gran_sizek);
958 if ((range_basek > basek) && basek)
959 return second_sizek;
960 state->range_sizek -= (range_basek - state->range_startk);
961 range_sizek = ALIGN(state->range_sizek, gran_sizek);
962
963 while (range_sizek > state->range_sizek) {
964 range_sizek -= gran_sizek;
965 if (!range_sizek)
966 return 0;
967 }
968 state->range_sizek = range_sizek;
969
970 /* try to append some small hole */
971 range0_basek = state->range_startk;
972 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
973 if (range0_sizek == state->range_sizek) {
974 if (debug_print)
975 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
976 range0_basek<<10,
977 (range0_basek + state->range_sizek)<<10);
978 state->reg = range_to_mtrr(state->reg, range0_basek,
979 state->range_sizek, MTRR_TYPE_WRBACK);
980 return 0;
981 }
982
983 range0_sizek -= chunk_sizek;
984 if (range0_sizek && sizek) {
985 while (range0_basek + range0_sizek > (basek + sizek)) {
986 range0_sizek -= chunk_sizek;
987 if (!range0_sizek)
988 break;
989 }
990 }
991
992 if (range0_sizek) {
993 if (debug_print)
994 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
995 range0_basek<<10,
996 (range0_basek + range0_sizek)<<10);
997 state->reg = range_to_mtrr(state->reg, range0_basek,
998 range0_sizek, MTRR_TYPE_WRBACK);
999
1000 }
1001
1002 range_basek = range0_basek + range0_sizek;
1003 range_sizek = chunk_sizek;
1004
1005 if (range_basek + range_sizek > basek &&
1006 range_basek + range_sizek <= (basek + sizek)) {
1007 /* one hole */
1008 second_basek = basek;
1009 second_sizek = range_basek + range_sizek - basek;
1010 }
1011
1012 /* if last piece, only could one hole near end */
1013 if ((second_basek || !basek) &&
1014 range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
1015 (chunk_sizek >> 1)) {
1016 /*
1017 * one hole in middle (second_sizek is 0) or at end
1018 * (second_sizek is 0 )
1019 */
1020 hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
1021 - second_sizek;
1022 hole_basek = range_basek + range_sizek - hole_sizek
1023 - second_sizek;
1024 } else {
1025 /* fallback for big hole, or several holes */
1026 range_sizek = state->range_sizek - range0_sizek;
1027 second_basek = 0;
1028 second_sizek = 0;
1029 }
1030
1031 if (debug_print)
1032 printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
1033 (range_basek + range_sizek)<<10);
1034 state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
1035 MTRR_TYPE_WRBACK);
1036 if (hole_sizek) {
1037 if (debug_print)
1038 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1039 hole_basek<<10, (hole_basek + hole_sizek)<<10);
1040 state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
1041 MTRR_TYPE_UNCACHABLE);
1042
1043 }
1044
1045 return second_sizek;
1046}
1047
1048static void __init
1049set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
1050 unsigned long size_pfn)
1051{
1052 unsigned long basek, sizek;
1053 unsigned long second_sizek = 0;
1054
1055 if (state->reg >= num_var_ranges)
1056 return;
1057
1058 basek = base_pfn << (PAGE_SHIFT - 10);
1059 sizek = size_pfn << (PAGE_SHIFT - 10);
1060
1061 /* See if I can merge with the last range */
1062 if ((basek <= 1024) ||
1063 (state->range_startk + state->range_sizek == basek)) {
1064 unsigned long endk = basek + sizek;
1065 state->range_sizek = endk - state->range_startk;
1066 return;
1067 }
1068 /* Write the range mtrrs */
1069 if (state->range_sizek != 0)
1070 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
1071
1072 /* Allocate an msr */
1073 state->range_startk = basek + second_sizek;
1074 state->range_sizek = sizek - second_sizek;
1075}
1076
1077/* mininum size of mtrr block that can take hole */
1078static u64 mtrr_chunk_size __initdata = (256ULL<<20);
1079
1080static int __init parse_mtrr_chunk_size_opt(char *p)
1081{
1082 if (!p)
1083 return -EINVAL;
1084 mtrr_chunk_size = memparse(p, &p);
1085 return 0;
1086}
1087early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
1088
1089/* granity of mtrr of block */
1090static u64 mtrr_gran_size __initdata;
1091
1092static int __init parse_mtrr_gran_size_opt(char *p)
1093{
1094 if (!p)
1095 return -EINVAL;
1096 mtrr_gran_size = memparse(p, &p);
1097 return 0;
1098}
1099early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
1100
1101static int nr_mtrr_spare_reg __initdata =
1102 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
1103
1104static int __init parse_mtrr_spare_reg(char *arg)
1105{
1106 if (arg)
1107 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
1108 return 0;
1109}
1110
1111early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
1112
1113static int __init
1114x86_setup_var_mtrrs(struct res_range *range, int nr_range,
1115 u64 chunk_size, u64 gran_size)
1116{
1117 struct var_mtrr_state var_state;
1118 int i;
1119 int num_reg;
1120
1121 var_state.range_startk = 0;
1122 var_state.range_sizek = 0;
1123 var_state.reg = 0;
1124 var_state.chunk_sizek = chunk_size >> 10;
1125 var_state.gran_sizek = gran_size >> 10;
1126
1127 memset(range_state, 0, sizeof(range_state));
1128
1129 /* Write the range etc */
1130 for (i = 0; i < nr_range; i++)
1131 set_var_mtrr_range(&var_state, range[i].start,
1132 range[i].end - range[i].start + 1);
1133
1134 /* Write the last range */
1135 if (var_state.range_sizek != 0)
1136 range_to_mtrr_with_hole(&var_state, 0, 0);
1137
1138 num_reg = var_state.reg;
1139 /* Clear out the extra MTRR's */
1140 while (var_state.reg < num_var_ranges) {
1141 save_var_mtrr(var_state.reg, 0, 0, 0);
1142 var_state.reg++;
1143 }
1144
1145 return num_reg;
1146}
1147
1148struct mtrr_cleanup_result {
1149 unsigned long gran_sizek;
1150 unsigned long chunk_sizek;
1151 unsigned long lose_cover_sizek;
1152 unsigned int num_reg;
1153 int bad;
1154};
1155
1156/*
1157 * gran_size: 1M, 2M, ..., 2G
1158 * chunk size: gran_size, ..., 4G
1159 * so we need (2+13)*6
1160 */
1161#define NUM_RESULT 90
1162#define PSHIFT (PAGE_SHIFT - 10)
1163
1164static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1165static struct res_range __initdata range_new[RANGE_NUM];
1166static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1167
1168static int __init mtrr_cleanup(unsigned address_bits)
1169{
1170 unsigned long extra_remove_base, extra_remove_size;
1171 unsigned long i, base, size, def, dummy;
1172 mtrr_type type;
1173 int nr_range, nr_range_new;
1174 u64 chunk_size, gran_size;
1175 unsigned long range_sums, range_sums_new;
1176 int index_good;
1177 int num_reg_good;
1178
1179 /* extra one for all 0 */
1180 int num[MTRR_NUM_TYPES + 1];
1181
1182 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1183 return 0;
1184 rdmsr(MTRRdefType_MSR, def, dummy);
1185 def &= 0xff;
1186 if (def != MTRR_TYPE_UNCACHABLE)
1187 return 0;
1188
1189 /* get it and store it aside */
1190 memset(range_state, 0, sizeof(range_state));
1191 for (i = 0; i < num_var_ranges; i++) {
1192 mtrr_if->get(i, &base, &size, &type);
1193 range_state[i].base_pfn = base;
1194 range_state[i].size_pfn = size;
1195 range_state[i].type = type;
1196 }
1197
1198 /* check entries number */
1199 memset(num, 0, sizeof(num));
1200 for (i = 0; i < num_var_ranges; i++) {
1201 type = range_state[i].type;
1202 size = range_state[i].size_pfn;
1203 if (type >= MTRR_NUM_TYPES)
1204 continue;
1205 if (!size)
1206 type = MTRR_NUM_TYPES;
1207 num[type]++;
1208 }
1209
1210 /* check if we got UC entries */
1211 if (!num[MTRR_TYPE_UNCACHABLE])
1212 return 0;
1213
1214 /* check if we only had WB and UC */
1215 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1216 num_var_ranges - num[MTRR_NUM_TYPES])
1217 return 0;
1218
1219 memset(range, 0, sizeof(range));
1220 extra_remove_size = 0;
1221 if (mtrr_tom2) {
1222 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1223 extra_remove_size =
1224 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1225 }
1226 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1227 extra_remove_size);
1228 range_sums = sum_ranges(range, nr_range);
1229 printk(KERN_INFO "total RAM coverred: %ldM\n",
1230 range_sums >> (20 - PAGE_SHIFT));
1231
1232 if (mtrr_chunk_size && mtrr_gran_size) {
1233 int num_reg;
1234
1235 debug_print = 1;
1236 /* convert ranges to var ranges state */
1237 num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
1238 mtrr_gran_size);
1239
1240 /* we got new setting in range_state, check it */
1241 memset(range_new, 0, sizeof(range_new));
1242 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1243 extra_remove_base,
1244 extra_remove_size);
1245 range_sums_new = sum_ranges(range_new, nr_range_new);
1246
1247 i = 0;
1248 result[i].chunk_sizek = mtrr_chunk_size >> 10;
1249 result[i].gran_sizek = mtrr_gran_size >> 10;
1250 result[i].num_reg = num_reg;
1251 if (range_sums < range_sums_new) {
1252 result[i].lose_cover_sizek =
1253 (range_sums_new - range_sums) << PSHIFT;
1254 result[i].bad = 1;
1255 } else
1256 result[i].lose_cover_sizek =
1257 (range_sums - range_sums_new) << PSHIFT;
1258
1259 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
1260 result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
1261 result[i].chunk_sizek >> 10);
1262 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n",
1263 result[i].num_reg, result[i].bad?"-":"",
1264 result[i].lose_cover_sizek >> 10);
1265 if (!result[i].bad) {
1266 set_var_mtrr_all(address_bits);
1267 return 1;
1268 }
1269 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1270 "will find optimal one\n");
1271 debug_print = 0;
1272 memset(result, 0, sizeof(result[0]));
1273 }
1274
1275 i = 0;
1276 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1277 memset(result, 0, sizeof(result));
1278 for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
1279 for (chunk_size = gran_size; chunk_size < (1ULL<<33);
1280 chunk_size <<= 1) {
1281 int num_reg;
1282
1283 if (debug_print)
1284 printk(KERN_INFO
1285 "\ngran_size: %lldM chunk_size_size: %lldM\n",
1286 gran_size >> 20, chunk_size >> 20);
1287 if (i >= NUM_RESULT)
1288 continue;
1289
1290 /* convert ranges to var ranges state */
1291 num_reg = x86_setup_var_mtrrs(range, nr_range,
1292 chunk_size, gran_size);
1293
1294 /* we got new setting in range_state, check it */
1295 memset(range_new, 0, sizeof(range_new));
1296 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1297 extra_remove_base, extra_remove_size);
1298 range_sums_new = sum_ranges(range_new, nr_range_new);
1299
1300 result[i].chunk_sizek = chunk_size >> 10;
1301 result[i].gran_sizek = gran_size >> 10;
1302 result[i].num_reg = num_reg;
1303 if (range_sums < range_sums_new) {
1304 result[i].lose_cover_sizek =
1305 (range_sums_new - range_sums) << PSHIFT;
1306 result[i].bad = 1;
1307 } else
1308 result[i].lose_cover_sizek =
1309 (range_sums - range_sums_new) << PSHIFT;
1310
1311 /* double check it */
1312 if (!result[i].bad && !result[i].lose_cover_sizek) {
1313 if (nr_range_new != nr_range ||
1314 memcmp(range, range_new, sizeof(range)))
1315 result[i].bad = 1;
1316 }
1317
1318 if (!result[i].bad && (range_sums - range_sums_new <
1319 min_loss_pfn[num_reg])) {
1320 min_loss_pfn[num_reg] =
1321 range_sums - range_sums_new;
1322 }
1323 i++;
1324 }
1325 }
1326
1327 /* print out all */
1328 for (i = 0; i < NUM_RESULT; i++) {
1329 printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
1330 result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
1331 result[i].chunk_sizek >> 10);
1332 printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
1333 result[i].num_reg, result[i].bad?"-":"",
1334 result[i].lose_cover_sizek >> 10);
1335 }
1336
1337 /* try to find the optimal index */
1338 if (nr_mtrr_spare_reg >= num_var_ranges)
1339 nr_mtrr_spare_reg = num_var_ranges - 1;
1340 num_reg_good = -1;
1341 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1342 if (!min_loss_pfn[i]) {
1343 num_reg_good = i;
1344 break;
1345 }
1346 }
1347
1348 index_good = -1;
1349 if (num_reg_good != -1) {
1350 for (i = 0; i < NUM_RESULT; i++) {
1351 if (!result[i].bad &&
1352 result[i].num_reg == num_reg_good &&
1353 !result[i].lose_cover_sizek) {
1354 index_good = i;
1355 break;
1356 }
1357 }
1358 }
1359
1360 if (index_good != -1) {
1361 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1362 i = index_good;
1363 printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
1364 result[i].gran_sizek >> 10,
1365 result[i].chunk_sizek >> 10);
1366 printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
1367 result[i].num_reg,
1368 result[i].lose_cover_sizek >> 10);
1369 /* convert ranges to var ranges state */
1370 chunk_size = result[i].chunk_sizek;
1371 chunk_size <<= 10;
1372 gran_size = result[i].gran_sizek;
1373 gran_size <<= 10;
1374 debug_print = 1;
1375 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1376 set_var_mtrr_all(address_bits);
1377 return 1;
1378 }
1379
1380 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
1381 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
1382
1383 return 0;
1384}
1385#else
1386static int __init mtrr_cleanup(unsigned address_bits)
1387{
1388 return 0;
1389}
1390#endif
1391
1392static int __initdata changed_by_mtrr_cleanup;
1393
612static int disable_mtrr_trim; 1394static int disable_mtrr_trim;
613 1395
614static int __init disable_mtrr_trim_setup(char *str) 1396static int __init disable_mtrr_trim_setup(char *str)
@@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void)
648 return 0; 1430 return 0;
649} 1431}
650 1432
1433static u64 __init real_trim_memory(unsigned long start_pfn,
1434 unsigned long limit_pfn)
1435{
1436 u64 trim_start, trim_size;
1437 trim_start = start_pfn;
1438 trim_start <<= PAGE_SHIFT;
1439 trim_size = limit_pfn;
1440 trim_size <<= PAGE_SHIFT;
1441 trim_size -= trim_start;
1442
1443 return e820_update_range(trim_start, trim_size, E820_RAM,
1444 E820_RESERVED);
1445}
651/** 1446/**
652 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 1447 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
653 * @end_pfn: ending page frame number 1448 * @end_pfn: ending page frame number
@@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
663{ 1458{
664 unsigned long i, base, size, highest_pfn = 0, def, dummy; 1459 unsigned long i, base, size, highest_pfn = 0, def, dummy;
665 mtrr_type type; 1460 mtrr_type type;
666 u64 trim_start, trim_size; 1461 int nr_range;
1462 u64 total_trim_size;
667 1463
1464 /* extra one for all 0 */
1465 int num[MTRR_NUM_TYPES + 1];
668 /* 1466 /*
669 * Make sure we only trim uncachable memory on machines that 1467 * Make sure we only trim uncachable memory on machines that
670 * support the Intel MTRR architecture: 1468 * support the Intel MTRR architecture:
@@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
676 if (def != MTRR_TYPE_UNCACHABLE) 1474 if (def != MTRR_TYPE_UNCACHABLE)
677 return 0; 1475 return 0;
678 1476
679 if (amd_special_default_mtrr()) 1477 /* get it and store it aside */
680 return 0; 1478 memset(range_state, 0, sizeof(range_state));
1479 for (i = 0; i < num_var_ranges; i++) {
1480 mtrr_if->get(i, &base, &size, &type);
1481 range_state[i].base_pfn = base;
1482 range_state[i].size_pfn = size;
1483 range_state[i].type = type;
1484 }
681 1485
682 /* Find highest cached pfn */ 1486 /* Find highest cached pfn */
683 for (i = 0; i < num_var_ranges; i++) { 1487 for (i = 0; i < num_var_ranges; i++) {
684 mtrr_if->get(i, &base, &size, &type); 1488 type = range_state[i].type;
685 if (type != MTRR_TYPE_WRBACK) 1489 if (type != MTRR_TYPE_WRBACK)
686 continue; 1490 continue;
1491 base = range_state[i].base_pfn;
1492 size = range_state[i].size_pfn;
687 if (highest_pfn < base + size) 1493 if (highest_pfn < base + size)
688 highest_pfn = base + size; 1494 highest_pfn = base + size;
689 } 1495 }
@@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
698 return 0; 1504 return 0;
699 } 1505 }
700 1506
701 if (highest_pfn < end_pfn) { 1507 /* check entries number */
1508 memset(num, 0, sizeof(num));
1509 for (i = 0; i < num_var_ranges; i++) {
1510 type = range_state[i].type;
1511 if (type >= MTRR_NUM_TYPES)
1512 continue;
1513 size = range_state[i].size_pfn;
1514 if (!size)
1515 type = MTRR_NUM_TYPES;
1516 num[type]++;
1517 }
1518
1519 /* no entry for WB? */
1520 if (!num[MTRR_TYPE_WRBACK])
1521 return 0;
1522
1523 /* check if we only had WB and UC */
1524 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1525 num_var_ranges - num[MTRR_NUM_TYPES])
1526 return 0;
1527
1528 memset(range, 0, sizeof(range));
1529 nr_range = 0;
1530 if (mtrr_tom2) {
1531 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1532 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1533 if (highest_pfn < range[nr_range].end + 1)
1534 highest_pfn = range[nr_range].end + 1;
1535 nr_range++;
1536 }
1537 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1538
1539 total_trim_size = 0;
1540 /* check the head */
1541 if (range[0].start)
1542 total_trim_size += real_trim_memory(0, range[0].start);
1543 /* check the holes */
1544 for (i = 0; i < nr_range - 1; i++) {
1545 if (range[i].end + 1 < range[i+1].start)
1546 total_trim_size += real_trim_memory(range[i].end + 1,
1547 range[i+1].start);
1548 }
1549 /* check the top */
1550 i = nr_range - 1;
1551 if (range[i].end + 1 < end_pfn)
1552 total_trim_size += real_trim_memory(range[i].end + 1,
1553 end_pfn);
1554
1555 if (total_trim_size) {
702 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 1556 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
703 " all of memory, losing %luMB of RAM.\n", 1557 " all of memory, losing %lluMB of RAM.\n",
704 (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); 1558 total_trim_size >> 20);
705 1559
706 WARN_ON(1); 1560 if (!changed_by_mtrr_cleanup)
1561 WARN_ON(1);
707 1562
708 printk(KERN_INFO "update e820 for mtrr\n"); 1563 printk(KERN_INFO "update e820 for mtrr\n");
709 trim_start = highest_pfn;
710 trim_start <<= PAGE_SHIFT;
711 trim_size = end_pfn;
712 trim_size <<= PAGE_SHIFT;
713 trim_size -= trim_start;
714 update_memory_range(trim_start, trim_size, E820_RAM,
715 E820_RESERVED);
716 update_e820(); 1564 update_e820();
1565
717 return 1; 1566 return 1;
718 } 1567 }
719 1568
@@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
729 */ 1578 */
730void __init mtrr_bp_init(void) 1579void __init mtrr_bp_init(void)
731{ 1580{
1581 u32 phys_addr;
732 init_ifs(); 1582 init_ifs();
733 1583
1584 phys_addr = 32;
1585
734 if (cpu_has_mtrr) { 1586 if (cpu_has_mtrr) {
735 mtrr_if = &generic_mtrr_ops; 1587 mtrr_if = &generic_mtrr_ops;
736 size_or_mask = 0xff000000; /* 36 bits */ 1588 size_or_mask = 0xff000000; /* 36 bits */
737 size_and_mask = 0x00f00000; 1589 size_and_mask = 0x00f00000;
1590 phys_addr = 36;
738 1591
739 /* This is an AMD specific MSR, but we assume(hope?) that 1592 /* This is an AMD specific MSR, but we assume(hope?) that
740 Intel will implement it to when they extend the address 1593 Intel will implement it to when they extend the address
741 bus of the Xeon. */ 1594 bus of the Xeon. */
742 if (cpuid_eax(0x80000000) >= 0x80000008) { 1595 if (cpuid_eax(0x80000000) >= 0x80000008) {
743 u32 phys_addr;
744 phys_addr = cpuid_eax(0x80000008) & 0xff; 1596 phys_addr = cpuid_eax(0x80000008) & 0xff;
745 /* CPUID workaround for Intel 0F33/0F34 CPU */ 1597 /* CPUID workaround for Intel 0F33/0F34 CPU */
746 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 1598 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void)
758 don't support PAE */ 1610 don't support PAE */
759 size_or_mask = 0xfff00000; /* 32 bits */ 1611 size_or_mask = 0xfff00000; /* 32 bits */
760 size_and_mask = 0; 1612 size_and_mask = 0;
1613 phys_addr = 32;
761 } 1614 }
762 } else { 1615 } else {
763 switch (boot_cpu_data.x86_vendor) { 1616 switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void)
791 if (mtrr_if) { 1644 if (mtrr_if) {
792 set_num_var_ranges(); 1645 set_num_var_ranges();
793 init_table(); 1646 init_table();
794 if (use_intel()) 1647 if (use_intel()) {
795 get_mtrr_state(); 1648 get_mtrr_state();
1649
1650 if (mtrr_cleanup(phys_addr)) {
1651 changed_by_mtrr_cleanup = 1;
1652 mtrr_if->set_all();
1653 }
1654
1655 }
796 } 1656 }
797} 1657}
798 1658
@@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void)
829{ 1689{
830 if (!mtrr_if) 1690 if (!mtrr_if)
831 return 0; 1691 return 0;
832 if (use_intel()) 1692 if (use_intel()) {
833 mtrr_state_warn(); 1693 if (!changed_by_mtrr_cleanup)
834 else { 1694 mtrr_state_warn();
1695 } else {
835 /* The CPUs haven't MTRR and seem to not support SMP. They have 1696 /* The CPUs haven't MTRR and seem to not support SMP. They have
836 * specific drivers, we use a tricky method to support 1697 * specific drivers, we use a tricky method to support
837 * suspend/resume for them. 1698 * suspend/resume for them.
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2cc77eb6fea3..2dc4ec656b23 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); 81void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); 82void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
83 83
84void fill_mtrr_var_range(unsigned int index,
85 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
84void get_mtrr_state(void); 86void get_mtrr_state(void);
85 87
86extern void set_mtrr_ops(struct mtrr_ops * ops); 88extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
92#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 94#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
93 95
94extern unsigned int num_var_ranges; 96extern unsigned int num_var_ranges;
97extern u64 mtrr_tom2;
95 98
96void mtrr_state_warn(void); 99void mtrr_state_warn(void);
97const char *mtrr_attrib_to_str(int x); 100const char *mtrr_attrib_to_str(int x);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe5..2e9bef6e3aa3 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,11 +1,15 @@
1/* local apic based NMI watchdog for various CPUs. 1/*
2 This file also handles reservation of performance counters for coordination 2 * local apic based NMI watchdog for various CPUs.
3 with other users (like oprofile). 3 *
4 4 * This file also handles reservation of performance counters for coordination
5 Note that these events normally don't tick when the CPU idles. This means 5 * with other users (like oprofile).
6 the frequency varies with CPU load. 6 *
7 7 * Note that these events normally don't tick when the CPU idles. This means
8 Original code for K7/P6 written by Keith Owens */ 8 * the frequency varies with CPU load.
9 *
10 * Original code for K7/P6 written by Keith Owens
11 *
12 */
9 13
10#include <linux/percpu.h> 14#include <linux/percpu.h>
11#include <linux/module.h> 15#include <linux/module.h>
@@ -36,12 +40,16 @@ struct wd_ops {
36 40
37static const struct wd_ops *wd_ops; 41static const struct wd_ops *wd_ops;
38 42
39/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 43/*
40 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 44 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0.
46 *
47 * It will be the max for all platforms (for now)
41 */ 48 */
42#define NMI_MAX_COUNTER_BITS 66 49#define NMI_MAX_COUNTER_BITS 66
43 50
44/* perfctr_nmi_owner tracks the ownership of the perfctr registers: 51/*
52 * perfctr_nmi_owner tracks the ownership of the perfctr registers:
45 * evtsel_nmi_owner tracks the ownership of the event selection 53 * evtsel_nmi_owner tracks the ownership of the event selection
46 * - different performance counters/ event selection may be reserved for 54 * - different performance counters/ event selection may be reserved for
47 * different subsystems this reservation system just tries to coordinate 55 * different subsystems this reservation system just tries to coordinate
@@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
73 return 0; 81 return 0;
74} 82}
75 83
76/* converts an msr to an appropriate reservation bit */ 84/*
77/* returns the bit offset of the event selection register */ 85 * converts an msr to an appropriate reservation bit
86 * returns the bit offset of the event selection register
87 */
78static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 88static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
79{ 89{
80 /* returns the bit offset of the event selection register */ 90 /* returns the bit offset of the event selection register */
@@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
114 124
115 return (!test_bit(counter, perfctr_nmi_owner)); 125 return (!test_bit(counter, perfctr_nmi_owner));
116} 126}
127EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
117 128
118int reserve_perfctr_nmi(unsigned int msr) 129int reserve_perfctr_nmi(unsigned int msr)
119{ 130{
@@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr)
128 return 1; 139 return 1;
129 return 0; 140 return 0;
130} 141}
142EXPORT_SYMBOL(reserve_perfctr_nmi);
131 143
132void release_perfctr_nmi(unsigned int msr) 144void release_perfctr_nmi(unsigned int msr)
133{ 145{
@@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr)
140 152
141 clear_bit(counter, perfctr_nmi_owner); 153 clear_bit(counter, perfctr_nmi_owner);
142} 154}
155EXPORT_SYMBOL(release_perfctr_nmi);
143 156
144int reserve_evntsel_nmi(unsigned int msr) 157int reserve_evntsel_nmi(unsigned int msr)
145{ 158{
@@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr)
154 return 1; 167 return 1;
155 return 0; 168 return 0;
156} 169}
170EXPORT_SYMBOL(reserve_evntsel_nmi);
157 171
158void release_evntsel_nmi(unsigned int msr) 172void release_evntsel_nmi(unsigned int msr)
159{ 173{
@@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr)
166 180
167 clear_bit(counter, evntsel_nmi_owner); 181 clear_bit(counter, evntsel_nmi_owner);
168} 182}
169
170EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
171EXPORT_SYMBOL(reserve_perfctr_nmi);
172EXPORT_SYMBOL(release_perfctr_nmi);
173EXPORT_SYMBOL(reserve_evntsel_nmi);
174EXPORT_SYMBOL(release_evntsel_nmi); 183EXPORT_SYMBOL(release_evntsel_nmi);
175 184
176void disable_lapic_nmi_watchdog(void) 185void disable_lapic_nmi_watchdog(void)
@@ -181,7 +190,9 @@ void disable_lapic_nmi_watchdog(void)
181 return; 190 return;
182 191
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); 192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
184 wd_ops->unreserve(); 193
194 if (wd_ops)
195 wd_ops->unreserve();
185 196
186 BUG_ON(atomic_read(&nmi_active) != 0); 197 BUG_ON(atomic_read(&nmi_active) != 0);
187} 198}
@@ -232,8 +243,8 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
232 return retval; 243 return retval;
233} 244}
234 245
235static void 246static void write_watchdog_counter(unsigned int perfctr_msr,
236write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) 247 const char *descr, unsigned nmi_hz)
237{ 248{
238 u64 count = (u64)cpu_khz * 1000; 249 u64 count = (u64)cpu_khz * 1000;
239 250
@@ -244,7 +255,7 @@ write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi
244} 255}
245 256
246static void write_watchdog_counter32(unsigned int perfctr_msr, 257static void write_watchdog_counter32(unsigned int perfctr_msr,
247 const char *descr, unsigned nmi_hz) 258 const char *descr, unsigned nmi_hz)
248{ 259{
249 u64 count = (u64)cpu_khz * 1000; 260 u64 count = (u64)cpu_khz * 1000;
250 261
@@ -254,9 +265,10 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
254 wrmsr(perfctr_msr, (u32)(-count), 0); 265 wrmsr(perfctr_msr, (u32)(-count), 0);
255} 266}
256 267
257/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface 268/*
258 nicely stable so there is not much variety */ 269 * AMD K7/K8/Family10h/Family11h support.
259 270 * AMD keeps this interface nicely stable so there is not much variety
271 */
260#define K7_EVNTSEL_ENABLE (1 << 22) 272#define K7_EVNTSEL_ENABLE (1 << 22)
261#define K7_EVNTSEL_INT (1 << 20) 273#define K7_EVNTSEL_INT (1 << 20)
262#define K7_EVNTSEL_OS (1 << 17) 274#define K7_EVNTSEL_OS (1 << 17)
@@ -289,7 +301,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
289 301
290 wd->perfctr_msr = perfctr_msr; 302 wd->perfctr_msr = perfctr_msr;
291 wd->evntsel_msr = evntsel_msr; 303 wd->evntsel_msr = evntsel_msr;
292 wd->cccr_msr = 0; //unused 304 wd->cccr_msr = 0; /* unused */
293 return 1; 305 return 1;
294} 306}
295 307
@@ -325,18 +337,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
325} 337}
326 338
327static const struct wd_ops k7_wd_ops = { 339static const struct wd_ops k7_wd_ops = {
328 .reserve = single_msr_reserve, 340 .reserve = single_msr_reserve,
329 .unreserve = single_msr_unreserve, 341 .unreserve = single_msr_unreserve,
330 .setup = setup_k7_watchdog, 342 .setup = setup_k7_watchdog,
331 .rearm = single_msr_rearm, 343 .rearm = single_msr_rearm,
332 .stop = single_msr_stop_watchdog, 344 .stop = single_msr_stop_watchdog,
333 .perfctr = MSR_K7_PERFCTR0, 345 .perfctr = MSR_K7_PERFCTR0,
334 .evntsel = MSR_K7_EVNTSEL0, 346 .evntsel = MSR_K7_EVNTSEL0,
335 .checkbit = 1ULL<<47, 347 .checkbit = 1ULL << 47,
336}; 348};
337 349
338/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ 350/*
339 351 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
352 */
340#define P6_EVNTSEL0_ENABLE (1 << 22) 353#define P6_EVNTSEL0_ENABLE (1 << 22)
341#define P6_EVNTSEL_INT (1 << 20) 354#define P6_EVNTSEL_INT (1 << 20)
342#define P6_EVNTSEL_OS (1 << 17) 355#define P6_EVNTSEL_OS (1 << 17)
@@ -372,52 +385,58 @@ static int setup_p6_watchdog(unsigned nmi_hz)
372 385
373 wd->perfctr_msr = perfctr_msr; 386 wd->perfctr_msr = perfctr_msr;
374 wd->evntsel_msr = evntsel_msr; 387 wd->evntsel_msr = evntsel_msr;
375 wd->cccr_msr = 0; //unused 388 wd->cccr_msr = 0; /* unused */
376 return 1; 389 return 1;
377} 390}
378 391
379static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 392static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
380{ 393{
381 /* P6 based Pentium M need to re-unmask 394 /*
395 * P6 based Pentium M need to re-unmask
382 * the apic vector but it doesn't hurt 396 * the apic vector but it doesn't hurt
383 * other P6 variant. 397 * other P6 variant.
384 * ArchPerfom/Core Duo also needs this */ 398 * ArchPerfom/Core Duo also needs this
399 */
385 apic_write(APIC_LVTPC, APIC_DM_NMI); 400 apic_write(APIC_LVTPC, APIC_DM_NMI);
401
386 /* P6/ARCH_PERFMON has 32 bit counter write */ 402 /* P6/ARCH_PERFMON has 32 bit counter write */
387 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 403 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
388} 404}
389 405
390static const struct wd_ops p6_wd_ops = { 406static const struct wd_ops p6_wd_ops = {
391 .reserve = single_msr_reserve, 407 .reserve = single_msr_reserve,
392 .unreserve = single_msr_unreserve, 408 .unreserve = single_msr_unreserve,
393 .setup = setup_p6_watchdog, 409 .setup = setup_p6_watchdog,
394 .rearm = p6_rearm, 410 .rearm = p6_rearm,
395 .stop = single_msr_stop_watchdog, 411 .stop = single_msr_stop_watchdog,
396 .perfctr = MSR_P6_PERFCTR0, 412 .perfctr = MSR_P6_PERFCTR0,
397 .evntsel = MSR_P6_EVNTSEL0, 413 .evntsel = MSR_P6_EVNTSEL0,
398 .checkbit = 1ULL<<39, 414 .checkbit = 1ULL << 39,
399}; 415};
400 416
401/* Intel P4 performance counters. By far the most complicated of all. */ 417/*
402 418 * Intel P4 performance counters.
403#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 419 * By far the most complicated of all.
404#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) 420 */
405#define P4_ESCR_OS (1<<3) 421#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
406#define P4_ESCR_USR (1<<2) 422#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
407#define P4_CCCR_OVF_PMI0 (1<<26) 423#define P4_ESCR_OS (1 << 3)
408#define P4_CCCR_OVF_PMI1 (1<<27) 424#define P4_ESCR_USR (1 << 2)
409#define P4_CCCR_THRESHOLD(N) ((N)<<20) 425#define P4_CCCR_OVF_PMI0 (1 << 26)
410#define P4_CCCR_COMPLEMENT (1<<19) 426#define P4_CCCR_OVF_PMI1 (1 << 27)
411#define P4_CCCR_COMPARE (1<<18) 427#define P4_CCCR_THRESHOLD(N) ((N) << 20)
412#define P4_CCCR_REQUIRED (3<<16) 428#define P4_CCCR_COMPLEMENT (1 << 19)
413#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) 429#define P4_CCCR_COMPARE (1 << 18)
414#define P4_CCCR_ENABLE (1<<12) 430#define P4_CCCR_REQUIRED (3 << 16)
415#define P4_CCCR_OVF (1<<31) 431#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
416 432#define P4_CCCR_ENABLE (1 << 12)
417/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 433#define P4_CCCR_OVF (1 << 31)
418 CRU_ESCR0 (with any non-null event selector) through a complemented
419 max threshold. [IA32-Vol3, Section 14.9.9] */
420 434
435/*
436 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
437 * CRU_ESCR0 (with any non-null event selector) through a complemented
438 * max threshold. [IA32-Vol3, Section 14.9.9]
439 */
421static int setup_p4_watchdog(unsigned nmi_hz) 440static int setup_p4_watchdog(unsigned nmi_hz)
422{ 441{
423 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 442 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
@@ -442,7 +461,8 @@ static int setup_p4_watchdog(unsigned nmi_hz)
442#endif 461#endif
443 ht_num = 0; 462 ht_num = 0;
444 463
445 /* performance counters are shared resources 464 /*
465 * performance counters are shared resources
446 * assign each hyperthread its own set 466 * assign each hyperthread its own set
447 * (re-use the ESCR0 register, seems safe 467 * (re-use the ESCR0 register, seems safe
448 * and keeps the cccr_val the same) 468 * and keeps the cccr_val the same)
@@ -540,20 +560,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
540} 560}
541 561
542static const struct wd_ops p4_wd_ops = { 562static const struct wd_ops p4_wd_ops = {
543 .reserve = p4_reserve, 563 .reserve = p4_reserve,
544 .unreserve = p4_unreserve, 564 .unreserve = p4_unreserve,
545 .setup = setup_p4_watchdog, 565 .setup = setup_p4_watchdog,
546 .rearm = p4_rearm, 566 .rearm = p4_rearm,
547 .stop = stop_p4_watchdog, 567 .stop = stop_p4_watchdog,
548 /* RED-PEN this is wrong for the other sibling */ 568 /* RED-PEN this is wrong for the other sibling */
549 .perfctr = MSR_P4_BPU_PERFCTR0, 569 .perfctr = MSR_P4_BPU_PERFCTR0,
550 .evntsel = MSR_P4_BSU_ESCR0, 570 .evntsel = MSR_P4_BSU_ESCR0,
551 .checkbit = 1ULL<<39, 571 .checkbit = 1ULL << 39,
552}; 572};
553 573
554/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully 574/*
555 all future Intel CPUs. */ 575 * Watchdog using the Intel architected PerfMon.
556 576 * Used for Core2 and hopefully all future Intel CPUs.
577 */
557#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 578#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
558#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 579#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
559 580
@@ -599,19 +620,19 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
599 620
600 wd->perfctr_msr = perfctr_msr; 621 wd->perfctr_msr = perfctr_msr;
601 wd->evntsel_msr = evntsel_msr; 622 wd->evntsel_msr = evntsel_msr;
602 wd->cccr_msr = 0; //unused 623 wd->cccr_msr = 0; /* unused */
603 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 624 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
604 return 1; 625 return 1;
605} 626}
606 627
607static struct wd_ops intel_arch_wd_ops __read_mostly = { 628static struct wd_ops intel_arch_wd_ops __read_mostly = {
608 .reserve = single_msr_reserve, 629 .reserve = single_msr_reserve,
609 .unreserve = single_msr_unreserve, 630 .unreserve = single_msr_unreserve,
610 .setup = setup_intel_arch_watchdog, 631 .setup = setup_intel_arch_watchdog,
611 .rearm = p6_rearm, 632 .rearm = p6_rearm,
612 .stop = single_msr_stop_watchdog, 633 .stop = single_msr_stop_watchdog,
613 .perfctr = MSR_ARCH_PERFMON_PERFCTR1, 634 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
614 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 635 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
615}; 636};
616 637
617static void probe_nmi_watchdog(void) 638static void probe_nmi_watchdog(void)
@@ -624,8 +645,10 @@ static void probe_nmi_watchdog(void)
624 wd_ops = &k7_wd_ops; 645 wd_ops = &k7_wd_ops;
625 break; 646 break;
626 case X86_VENDOR_INTEL: 647 case X86_VENDOR_INTEL:
627 /* Work around Core Duo (Yonah) errata AE49 where perfctr1 648 /*
628 doesn't have a working enable bit. */ 649 * Work around Core Duo (Yonah) errata AE49 where perfctr1
650 * doesn't have a working enable bit.
651 */
629 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 652 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
630 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 653 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
631 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 654 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
@@ -636,7 +659,7 @@ static void probe_nmi_watchdog(void)
636 } 659 }
637 switch (boot_cpu_data.x86) { 660 switch (boot_cpu_data.x86) {
638 case 6: 661 case 6:
639 if (boot_cpu_data.x86_model > 0xd) 662 if (boot_cpu_data.x86_model > 13)
640 return; 663 return;
641 664
642 wd_ops = &p6_wd_ops; 665 wd_ops = &p6_wd_ops;
@@ -697,10 +720,11 @@ int lapic_wd_event(unsigned nmi_hz)
697{ 720{
698 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 721 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
699 u64 ctr; 722 u64 ctr;
723
700 rdmsrl(wd->perfctr_msr, ctr); 724 rdmsrl(wd->perfctr_msr, ctr);
701 if (ctr & wd_ops->checkbit) { /* perfctr still running? */ 725 if (ctr & wd_ops->checkbit) /* perfctr still running? */
702 return 0; 726 return 0;
703 } 727
704 wd_ops->rearm(wd, nmi_hz); 728 wd_ops->rearm(wd, nmi_hz);
705 return 1; 729 return 1;
706} 730}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
new file mode 100644
index 000000000000..a5383ae2cbe3
--- /dev/null
+++ b/arch/x86/kernel/e820.c
@@ -0,0 +1,1405 @@
1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
7 * Alex Achenbach <xela@slit.de>, December 2002.
8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/pfn.h>
21#include <linux/suspend.h>
22#include <linux/firmware-map.h>
23
24#include <asm/pgtable.h>
25#include <asm/page.h>
26#include <asm/e820.h>
27#include <asm/proto.h>
28#include <asm/setup.h>
29#include <asm/trampoline.h>
30
31/*
32 * The e820 map is the map that gets modified e.g. with command line parameters
33 * and that is also registered with modifications in the kernel resource tree
34 * with the iomem_resource as parent.
35 *
36 * The e820_saved is directly saved after the BIOS-provided memory map is
37 * copied. It doesn't get modified afterwards. It's registered for the
38 * /sys/firmware/memmap interface.
39 *
40 * That memory map is not modified and is used as base for kexec. The kexec'd
41 * kernel should get the same memory map as the firmware provides. Then the
42 * user can e.g. boot the original kernel with mem=1G while still booting the
43 * next kernel with full memory.
44 */
45struct e820map e820;
46struct e820map e820_saved;
47
48/* For PCI or other memory-mapped resources */
49unsigned long pci_mem_start = 0xaeedbabe;
50#ifdef CONFIG_PCI
51EXPORT_SYMBOL(pci_mem_start);
52#endif
53
54/*
55 * This function checks if any part of the range <start,end> is mapped
56 * with type.
57 */
58int
59e820_any_mapped(u64 start, u64 end, unsigned type)
60{
61 int i;
62
63 for (i = 0; i < e820.nr_map; i++) {
64 struct e820entry *ei = &e820.map[i];
65
66 if (type && ei->type != type)
67 continue;
68 if (ei->addr >= end || ei->addr + ei->size <= start)
69 continue;
70 return 1;
71 }
72 return 0;
73}
74EXPORT_SYMBOL_GPL(e820_any_mapped);
75
76/*
77 * This function checks if the entire range <start,end> is mapped with type.
78 *
79 * Note: this function only works correct if the e820 table is sorted and
80 * not-overlapping, which is the case
81 */
82int __init e820_all_mapped(u64 start, u64 end, unsigned type)
83{
84 int i;
85
86 for (i = 0; i < e820.nr_map; i++) {
87 struct e820entry *ei = &e820.map[i];
88
89 if (type && ei->type != type)
90 continue;
91 /* is the region (part) in overlap with the current region ?*/
92 if (ei->addr >= end || ei->addr + ei->size <= start)
93 continue;
94
95 /* if the region is at the beginning of <start,end> we move
96 * start to the end of the region since it's ok until there
97 */
98 if (ei->addr <= start)
99 start = ei->addr + ei->size;
100 /*
101 * if start is now at or beyond end, we're done, full
102 * coverage
103 */
104 if (start >= end)
105 return 1;
106 }
107 return 0;
108}
109
110/*
111 * Add a memory region to the kernel e820 map.
112 */
113void __init e820_add_region(u64 start, u64 size, int type)
114{
115 int x = e820.nr_map;
116
117 if (x == ARRAY_SIZE(e820.map)) {
118 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
119 return;
120 }
121
122 e820.map[x].addr = start;
123 e820.map[x].size = size;
124 e820.map[x].type = type;
125 e820.nr_map++;
126}
127
128void __init e820_print_map(char *who)
129{
130 int i;
131
132 for (i = 0; i < e820.nr_map; i++) {
133 printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
134 (unsigned long long) e820.map[i].addr,
135 (unsigned long long)
136 (e820.map[i].addr + e820.map[i].size));
137 switch (e820.map[i].type) {
138 case E820_RAM:
139 case E820_RESERVED_KERN:
140 printk(KERN_CONT "(usable)\n");
141 break;
142 case E820_RESERVED:
143 printk(KERN_CONT "(reserved)\n");
144 break;
145 case E820_ACPI:
146 printk(KERN_CONT "(ACPI data)\n");
147 break;
148 case E820_NVS:
149 printk(KERN_CONT "(ACPI NVS)\n");
150 break;
151 default:
152 printk(KERN_CONT "type %u\n", e820.map[i].type);
153 break;
154 }
155 }
156}
157
158/*
159 * Sanitize the BIOS e820 map.
160 *
161 * Some e820 responses include overlapping entries. The following
162 * replaces the original e820 map with a new one, removing overlaps,
163 * and resolving conflicting memory types in favor of highest
164 * numbered type.
165 *
166 * The input parameter biosmap points to an array of 'struct
167 * e820entry' which on entry has elements in the range [0, *pnr_map)
168 * valid, and which has space for up to max_nr_map entries.
169 * On return, the resulting sanitized e820 map entries will be in
170 * overwritten in the same location, starting at biosmap.
171 *
172 * The integer pointed to by pnr_map must be valid on entry (the
173 * current number of valid entries located at biosmap) and will
174 * be updated on return, with the new number of valid entries
175 * (something no more than max_nr_map.)
176 *
177 * The return value from sanitize_e820_map() is zero if it
178 * successfully 'sanitized' the map entries passed in, and is -1
179 * if it did nothing, which can happen if either of (1) it was
180 * only passed one map entry, or (2) any of the input map entries
181 * were invalid (start + size < start, meaning that the size was
182 * so big the described memory range wrapped around through zero.)
183 *
184 * Visually we're performing the following
185 * (1,2,3,4 = memory types)...
186 *
187 * Sample memory map (w/overlaps):
188 * ____22__________________
189 * ______________________4_
190 * ____1111________________
191 * _44_____________________
192 * 11111111________________
193 * ____________________33__
194 * ___________44___________
195 * __________33333_________
196 * ______________22________
197 * ___________________2222_
198 * _________111111111______
199 * _____________________11_
200 * _________________4______
201 *
202 * Sanitized equivalent (no overlap):
203 * 1_______________________
204 * _44_____________________
205 * ___1____________________
206 * ____22__________________
207 * ______11________________
208 * _________1______________
209 * __________3_____________
210 * ___________44___________
211 * _____________33_________
212 * _______________2________
213 * ________________1_______
214 * _________________4______
215 * ___________________2____
216 * ____________________33__
217 * ______________________4_
218 */
219
220int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
221 int *pnr_map)
222{
223 struct change_member {
224 struct e820entry *pbios; /* pointer to original bios entry */
225 unsigned long long addr; /* address for this change point */
226 };
227 static struct change_member change_point_list[2*E820_X_MAX] __initdata;
228 static struct change_member *change_point[2*E820_X_MAX] __initdata;
229 static struct e820entry *overlap_list[E820_X_MAX] __initdata;
230 static struct e820entry new_bios[E820_X_MAX] __initdata;
231 struct change_member *change_tmp;
232 unsigned long current_type, last_type;
233 unsigned long long last_addr;
234 int chgidx, still_changing;
235 int overlap_entries;
236 int new_bios_entry;
237 int old_nr, new_nr, chg_nr;
238 int i;
239
240 /* if there's only one memory region, don't bother */
241 if (*pnr_map < 2)
242 return -1;
243
244 old_nr = *pnr_map;
245 BUG_ON(old_nr > max_nr_map);
246
247 /* bail out if we find any unreasonable addresses in bios map */
248 for (i = 0; i < old_nr; i++)
249 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
250 return -1;
251
252 /* create pointers for initial change-point information (for sorting) */
253 for (i = 0; i < 2 * old_nr; i++)
254 change_point[i] = &change_point_list[i];
255
256 /* record all known change-points (starting and ending addresses),
257 omitting those that are for empty memory regions */
258 chgidx = 0;
259 for (i = 0; i < old_nr; i++) {
260 if (biosmap[i].size != 0) {
261 change_point[chgidx]->addr = biosmap[i].addr;
262 change_point[chgidx++]->pbios = &biosmap[i];
263 change_point[chgidx]->addr = biosmap[i].addr +
264 biosmap[i].size;
265 change_point[chgidx++]->pbios = &biosmap[i];
266 }
267 }
268 chg_nr = chgidx;
269
270 /* sort change-point list by memory addresses (low -> high) */
271 still_changing = 1;
272 while (still_changing) {
273 still_changing = 0;
274 for (i = 1; i < chg_nr; i++) {
275 unsigned long long curaddr, lastaddr;
276 unsigned long long curpbaddr, lastpbaddr;
277
278 curaddr = change_point[i]->addr;
279 lastaddr = change_point[i - 1]->addr;
280 curpbaddr = change_point[i]->pbios->addr;
281 lastpbaddr = change_point[i - 1]->pbios->addr;
282
283 /*
284 * swap entries, when:
285 *
286 * curaddr > lastaddr or
287 * curaddr == lastaddr and curaddr == curpbaddr and
288 * lastaddr != lastpbaddr
289 */
290 if (curaddr < lastaddr ||
291 (curaddr == lastaddr && curaddr == curpbaddr &&
292 lastaddr != lastpbaddr)) {
293 change_tmp = change_point[i];
294 change_point[i] = change_point[i-1];
295 change_point[i-1] = change_tmp;
296 still_changing = 1;
297 }
298 }
299 }
300
301 /* create a new bios memory map, removing overlaps */
302 overlap_entries = 0; /* number of entries in the overlap table */
303 new_bios_entry = 0; /* index for creating new bios map entries */
304 last_type = 0; /* start with undefined memory type */
305 last_addr = 0; /* start with 0 as last starting address */
306
307 /* loop through change-points, determining affect on the new bios map */
308 for (chgidx = 0; chgidx < chg_nr; chgidx++) {
309 /* keep track of all overlapping bios entries */
310 if (change_point[chgidx]->addr ==
311 change_point[chgidx]->pbios->addr) {
312 /*
313 * add map entry to overlap list (> 1 entry
314 * implies an overlap)
315 */
316 overlap_list[overlap_entries++] =
317 change_point[chgidx]->pbios;
318 } else {
319 /*
320 * remove entry from list (order independent,
321 * so swap with last)
322 */
323 for (i = 0; i < overlap_entries; i++) {
324 if (overlap_list[i] ==
325 change_point[chgidx]->pbios)
326 overlap_list[i] =
327 overlap_list[overlap_entries-1];
328 }
329 overlap_entries--;
330 }
331 /*
332 * if there are overlapping entries, decide which
333 * "type" to use (larger value takes precedence --
334 * 1=usable, 2,3,4,4+=unusable)
335 */
336 current_type = 0;
337 for (i = 0; i < overlap_entries; i++)
338 if (overlap_list[i]->type > current_type)
339 current_type = overlap_list[i]->type;
340 /*
341 * continue building up new bios map based on this
342 * information
343 */
344 if (current_type != last_type) {
345 if (last_type != 0) {
346 new_bios[new_bios_entry].size =
347 change_point[chgidx]->addr - last_addr;
348 /*
349 * move forward only if the new size
350 * was non-zero
351 */
352 if (new_bios[new_bios_entry].size != 0)
353 /*
354 * no more space left for new
355 * bios entries ?
356 */
357 if (++new_bios_entry >= max_nr_map)
358 break;
359 }
360 if (current_type != 0) {
361 new_bios[new_bios_entry].addr =
362 change_point[chgidx]->addr;
363 new_bios[new_bios_entry].type = current_type;
364 last_addr = change_point[chgidx]->addr;
365 }
366 last_type = current_type;
367 }
368 }
369 /* retain count for new bios entries */
370 new_nr = new_bios_entry;
371
372 /* copy new bios mapping into original location */
373 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
374 *pnr_map = new_nr;
375
376 return 0;
377}
378
379static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
380{
381 while (nr_map) {
382 u64 start = biosmap->addr;
383 u64 size = biosmap->size;
384 u64 end = start + size;
385 u32 type = biosmap->type;
386
387 /* Overflow in 64 bits? Ignore the memory map. */
388 if (start > end)
389 return -1;
390
391 e820_add_region(start, size, type);
392
393 biosmap++;
394 nr_map--;
395 }
396 return 0;
397}
398
399/*
400 * Copy the BIOS e820 map into a safe place.
401 *
402 * Sanity-check it while we're at it..
403 *
404 * If we're lucky and live on a modern system, the setup code
405 * will have given us a memory map that we can use to properly
406 * set up memory. If we aren't, we'll fake a memory map.
407 */
408static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
409{
410 /* Only one memory region (or negative)? Ignore it */
411 if (nr_map < 2)
412 return -1;
413
414 return __append_e820_map(biosmap, nr_map);
415}
416
417static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
418 u64 size, unsigned old_type,
419 unsigned new_type)
420{
421 int i;
422 u64 real_updated_size = 0;
423
424 BUG_ON(old_type == new_type);
425
426 if (size > (ULLONG_MAX - start))
427 size = ULLONG_MAX - start;
428
429 for (i = 0; i < e820.nr_map; i++) {
430 struct e820entry *ei = &e820x->map[i];
431 u64 final_start, final_end;
432 if (ei->type != old_type)
433 continue;
434 /* totally covered? */
435 if (ei->addr >= start &&
436 (ei->addr + ei->size) <= (start + size)) {
437 ei->type = new_type;
438 real_updated_size += ei->size;
439 continue;
440 }
441 /* partially covered */
442 final_start = max(start, ei->addr);
443 final_end = min(start + size, ei->addr + ei->size);
444 if (final_start >= final_end)
445 continue;
446 e820_add_region(final_start, final_end - final_start,
447 new_type);
448 real_updated_size += final_end - final_start;
449
450 ei->size -= final_end - final_start;
451 if (ei->addr < final_start)
452 continue;
453 ei->addr = final_end;
454 }
455 return real_updated_size;
456}
457
458u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
459 unsigned new_type)
460{
461 return e820_update_range_map(&e820, start, size, old_type, new_type);
462}
463
464static u64 __init e820_update_range_saved(u64 start, u64 size,
465 unsigned old_type, unsigned new_type)
466{
467 return e820_update_range_map(&e820_saved, start, size, old_type,
468 new_type);
469}
470
471/* make e820 not cover the range */
472u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
473 int checktype)
474{
475 int i;
476 u64 real_removed_size = 0;
477
478 if (size > (ULLONG_MAX - start))
479 size = ULLONG_MAX - start;
480
481 for (i = 0; i < e820.nr_map; i++) {
482 struct e820entry *ei = &e820.map[i];
483 u64 final_start, final_end;
484
485 if (checktype && ei->type != old_type)
486 continue;
487 /* totally covered? */
488 if (ei->addr >= start &&
489 (ei->addr + ei->size) <= (start + size)) {
490 real_removed_size += ei->size;
491 memset(ei, 0, sizeof(struct e820entry));
492 continue;
493 }
494 /* partially covered */
495 final_start = max(start, ei->addr);
496 final_end = min(start + size, ei->addr + ei->size);
497 if (final_start >= final_end)
498 continue;
499 real_removed_size += final_end - final_start;
500
501 ei->size -= final_end - final_start;
502 if (ei->addr < final_start)
503 continue;
504 ei->addr = final_end;
505 }
506 return real_removed_size;
507}
508
509void __init update_e820(void)
510{
511 int nr_map;
512
513 nr_map = e820.nr_map;
514 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
515 return;
516 e820.nr_map = nr_map;
517 printk(KERN_INFO "modified physical RAM map:\n");
518 e820_print_map("modified");
519}
520static void __init update_e820_saved(void)
521{
522 int nr_map;
523
524 nr_map = e820_saved.nr_map;
525 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
526 return;
527 e820_saved.nr_map = nr_map;
528}
529#define MAX_GAP_END 0x100000000ull
530/*
531 * Search for a gap in the e820 memory space from start_addr to end_addr.
532 */
533__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
534 unsigned long start_addr, unsigned long long end_addr)
535{
536 unsigned long long last;
537 int i = e820.nr_map;
538 int found = 0;
539
540 last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
541
542 while (--i >= 0) {
543 unsigned long long start = e820.map[i].addr;
544 unsigned long long end = start + e820.map[i].size;
545
546 if (end < start_addr)
547 continue;
548
549 /*
550 * Since "last" is at most 4GB, we know we'll
551 * fit in 32 bits if this condition is true
552 */
553 if (last > end) {
554 unsigned long gap = last - end;
555
556 if (gap >= *gapsize) {
557 *gapsize = gap;
558 *gapstart = end;
559 found = 1;
560 }
561 }
562 if (start < last)
563 last = start;
564 }
565 return found;
566}
567
568/*
569 * Search for the biggest gap in the low 32 bits of the e820
570 * memory space. We pass this space to PCI to assign MMIO resources
571 * for hotplug or unconfigured devices in.
572 * Hopefully the BIOS let enough space left.
573 */
574__init void e820_setup_gap(void)
575{
576 unsigned long gapstart, gapsize, round;
577 int found;
578
579 gapstart = 0x10000000;
580 gapsize = 0x400000;
581 found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
582
583#ifdef CONFIG_X86_64
584 if (!found) {
585 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
586 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
587 "address range\n"
588 KERN_ERR "PCI: Unassigned devices with 32bit resource "
589 "registers may break!\n");
590 }
591#endif
592
593 /*
594 * See how much we want to round up: start off with
595 * rounding to the next 1MB area.
596 */
597 round = 0x100000;
598 while ((gapsize >> 4) > round)
599 round += round;
600 /* Fun with two's complement */
601 pci_mem_start = (gapstart + round) & -round;
602
603 printk(KERN_INFO
604 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
605 pci_mem_start, gapstart, gapsize);
606}
607
608/**
609 * Because of the size limitation of struct boot_params, only first
610 * 128 E820 memory entries are passed to kernel via
611 * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
612 * linked list of struct setup_data, which is parsed here.
613 */
614void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
615{
616 u32 map_len;
617 int entries;
618 struct e820entry *extmap;
619
620 entries = sdata->len / sizeof(struct e820entry);
621 map_len = sdata->len + sizeof(struct setup_data);
622 if (map_len > PAGE_SIZE)
623 sdata = early_ioremap(pa_data, map_len);
624 extmap = (struct e820entry *)(sdata->data);
625 __append_e820_map(extmap, entries);
626 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
627 if (map_len > PAGE_SIZE)
628 early_iounmap(sdata, map_len);
629 printk(KERN_INFO "extended physical RAM map:\n");
630 e820_print_map("extended");
631}
632
633#if defined(CONFIG_X86_64) || \
634 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
635/**
636 * Find the ranges of physical addresses that do not correspond to
637 * e820 RAM areas and mark the corresponding pages as nosave for
638 * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
639 *
640 * This function requires the e820 map to be sorted and without any
641 * overlapping entries and assumes the first e820 area to be RAM.
642 */
643void __init e820_mark_nosave_regions(unsigned long limit_pfn)
644{
645 int i;
646 unsigned long pfn;
647
648 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
649 for (i = 1; i < e820.nr_map; i++) {
650 struct e820entry *ei = &e820.map[i];
651
652 if (pfn < PFN_UP(ei->addr))
653 register_nosave_region(pfn, PFN_UP(ei->addr));
654
655 pfn = PFN_DOWN(ei->addr + ei->size);
656 if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
657 register_nosave_region(PFN_UP(ei->addr), pfn);
658
659 if (pfn >= limit_pfn)
660 break;
661 }
662}
663#endif
664
665/*
666 * Early reserved memory areas.
667 */
668#define MAX_EARLY_RES 20
669
670struct early_res {
671 u64 start, end;
672 char name[16];
673 char overlap_ok;
674};
675static struct early_res early_res[MAX_EARLY_RES] __initdata = {
676 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
677#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
678 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
679#endif
680#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
681 /*
682 * But first pinch a few for the stack/trampoline stuff
683 * FIXME: Don't need the extra page at 4K, but need to fix
684 * trampoline before removing it. (see the GDT stuff)
685 */
686 { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
687 /*
688 * Has to be in very low memory so we can execute
689 * real-mode AP code.
690 */
691 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
692#endif
693 {}
694};
695
696static int __init find_overlapped_early(u64 start, u64 end)
697{
698 int i;
699 struct early_res *r;
700
701 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
702 r = &early_res[i];
703 if (end > r->start && start < r->end)
704 break;
705 }
706
707 return i;
708}
709
710/*
711 * Drop the i-th range from the early reservation map,
712 * by copying any higher ranges down one over it, and
713 * clearing what had been the last slot.
714 */
715static void __init drop_range(int i)
716{
717 int j;
718
719 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
720 ;
721
722 memmove(&early_res[i], &early_res[i + 1],
723 (j - 1 - i) * sizeof(struct early_res));
724
725 early_res[j - 1].end = 0;
726}
727
728/*
729 * Split any existing ranges that:
730 * 1) are marked 'overlap_ok', and
731 * 2) overlap with the stated range [start, end)
732 * into whatever portion (if any) of the existing range is entirely
733 * below or entirely above the stated range. Drop the portion
734 * of the existing range that overlaps with the stated range,
735 * which will allow the caller of this routine to then add that
736 * stated range without conflicting with any existing range.
737 */
738static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
739{
740 int i;
741 struct early_res *r;
742 u64 lower_start, lower_end;
743 u64 upper_start, upper_end;
744 char name[16];
745
746 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
747 r = &early_res[i];
748
749 /* Continue past non-overlapping ranges */
750 if (end <= r->start || start >= r->end)
751 continue;
752
753 /*
754 * Leave non-ok overlaps as is; let caller
755 * panic "Overlapping early reservations"
756 * when it hits this overlap.
757 */
758 if (!r->overlap_ok)
759 return;
760
761 /*
762 * We have an ok overlap. We will drop it from the early
763 * reservation map, and add back in any non-overlapping
764 * portions (lower or upper) as separate, overlap_ok,
765 * non-overlapping ranges.
766 */
767
768 /* 1. Note any non-overlapping (lower or upper) ranges. */
769 strncpy(name, r->name, sizeof(name) - 1);
770
771 lower_start = lower_end = 0;
772 upper_start = upper_end = 0;
773 if (r->start < start) {
774 lower_start = r->start;
775 lower_end = start;
776 }
777 if (r->end > end) {
778 upper_start = end;
779 upper_end = r->end;
780 }
781
782 /* 2. Drop the original ok overlapping range */
783 drop_range(i);
784
785 i--; /* resume for-loop on copied down entry */
786
787 /* 3. Add back in any non-overlapping ranges. */
788 if (lower_end)
789 reserve_early_overlap_ok(lower_start, lower_end, name);
790 if (upper_end)
791 reserve_early_overlap_ok(upper_start, upper_end, name);
792 }
793}
794
795static void __init __reserve_early(u64 start, u64 end, char *name,
796 int overlap_ok)
797{
798 int i;
799 struct early_res *r;
800
801 i = find_overlapped_early(start, end);
802 if (i >= MAX_EARLY_RES)
803 panic("Too many early reservations");
804 r = &early_res[i];
805 if (r->end)
806 panic("Overlapping early reservations "
807 "%llx-%llx %s to %llx-%llx %s\n",
808 start, end - 1, name?name:"", r->start,
809 r->end - 1, r->name);
810 r->start = start;
811 r->end = end;
812 r->overlap_ok = overlap_ok;
813 if (name)
814 strncpy(r->name, name, sizeof(r->name) - 1);
815}
816
817/*
818 * A few early reservtations come here.
819 *
820 * The 'overlap_ok' in the name of this routine does -not- mean it
821 * is ok for these reservations to overlap an earlier reservation.
822 * Rather it means that it is ok for subsequent reservations to
823 * overlap this one.
824 *
825 * Use this entry point to reserve early ranges when you are doing
826 * so out of "Paranoia", reserving perhaps more memory than you need,
827 * just in case, and don't mind a subsequent overlapping reservation
828 * that is known to be needed.
829 *
830 * The drop_overlaps_that_are_ok() call here isn't really needed.
831 * It would be needed if we had two colliding 'overlap_ok'
832 * reservations, so that the second such would not panic on the
833 * overlap with the first. We don't have any such as of this
834 * writing, but might as well tolerate such if it happens in
835 * the future.
836 */
837void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
838{
839 drop_overlaps_that_are_ok(start, end);
840 __reserve_early(start, end, name, 1);
841}
842
843/*
844 * Most early reservations come here.
845 *
846 * We first have drop_overlaps_that_are_ok() drop any pre-existing
847 * 'overlap_ok' ranges, so that we can then reserve this memory
848 * range without risk of panic'ing on an overlapping overlap_ok
849 * early reservation.
850 */
851void __init reserve_early(u64 start, u64 end, char *name)
852{
853 drop_overlaps_that_are_ok(start, end);
854 __reserve_early(start, end, name, 0);
855}
856
857void __init free_early(u64 start, u64 end)
858{
859 struct early_res *r;
860 int i;
861
862 i = find_overlapped_early(start, end);
863 r = &early_res[i];
864 if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
865 panic("free_early on not reserved area: %llx-%llx!",
866 start, end - 1);
867
868 drop_range(i);
869}
870
871void __init early_res_to_bootmem(u64 start, u64 end)
872{
873 int i, count;
874 u64 final_start, final_end;
875
876 count = 0;
877 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
878 count++;
879
880 printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count);
881 for (i = 0; i < count; i++) {
882 struct early_res *r = &early_res[i];
883 printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
884 r->start, r->end, r->name);
885 final_start = max(start, r->start);
886 final_end = min(end, r->end);
887 if (final_start >= final_end) {
888 printk(KERN_CONT "\n");
889 continue;
890 }
891 printk(KERN_CONT " ==> [%010llx - %010llx]\n",
892 final_start, final_end);
893 reserve_bootmem_generic(final_start, final_end - final_start,
894 BOOTMEM_DEFAULT);
895 }
896}
897
898/* Check for already reserved areas */
899static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
900{
901 int i;
902 u64 addr = *addrp;
903 int changed = 0;
904 struct early_res *r;
905again:
906 i = find_overlapped_early(addr, addr + size);
907 r = &early_res[i];
908 if (i < MAX_EARLY_RES && r->end) {
909 *addrp = addr = round_up(r->end, align);
910 changed = 1;
911 goto again;
912 }
913 return changed;
914}
915
916/* Check for already reserved areas */
917static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
918{
919 int i;
920 u64 addr = *addrp, last;
921 u64 size = *sizep;
922 int changed = 0;
923again:
924 last = addr + size;
925 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
926 struct early_res *r = &early_res[i];
927 if (last > r->start && addr < r->start) {
928 size = r->start - addr;
929 changed = 1;
930 goto again;
931 }
932 if (last > r->end && addr < r->end) {
933 addr = round_up(r->end, align);
934 size = last - addr;
935 changed = 1;
936 goto again;
937 }
938 if (last <= r->end && addr >= r->start) {
939 (*sizep)++;
940 return 0;
941 }
942 }
943 if (changed) {
944 *addrp = addr;
945 *sizep = size;
946 }
947 return changed;
948}
949
950/*
951 * Find a free area with specified alignment in a specific range.
952 */
953u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
954{
955 int i;
956
957 for (i = 0; i < e820.nr_map; i++) {
958 struct e820entry *ei = &e820.map[i];
959 u64 addr, last;
960 u64 ei_last;
961
962 if (ei->type != E820_RAM)
963 continue;
964 addr = round_up(ei->addr, align);
965 ei_last = ei->addr + ei->size;
966 if (addr < start)
967 addr = round_up(start, align);
968 if (addr >= ei_last)
969 continue;
970 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
971 ;
972 last = addr + size;
973 if (last > ei_last)
974 continue;
975 if (last > end)
976 continue;
977 return addr;
978 }
979 return -1ULL;
980}
981
982/*
983 * Find next free range after *start
984 */
985u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
986{
987 int i;
988
989 for (i = 0; i < e820.nr_map; i++) {
990 struct e820entry *ei = &e820.map[i];
991 u64 addr, last;
992 u64 ei_last;
993
994 if (ei->type != E820_RAM)
995 continue;
996 addr = round_up(ei->addr, align);
997 ei_last = ei->addr + ei->size;
998 if (addr < start)
999 addr = round_up(start, align);
1000 if (addr >= ei_last)
1001 continue;
1002 *sizep = ei_last - addr;
1003 while (bad_addr_size(&addr, sizep, align) &&
1004 addr + *sizep <= ei_last)
1005 ;
1006 last = addr + *sizep;
1007 if (last > ei_last)
1008 continue;
1009 return addr;
1010 }
1011 return -1UL;
1012
1013}
1014
1015/*
1016 * pre allocated 4k and reserved it in e820
1017 */
1018u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
1019{
1020 u64 size = 0;
1021 u64 addr;
1022 u64 start;
1023
1024 start = startt;
1025 while (size < sizet)
1026 start = find_e820_area_size(start, &size, align);
1027
1028 if (size < sizet)
1029 return 0;
1030
1031 addr = round_down(start + size - sizet, align);
1032 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
1033 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
1034 printk(KERN_INFO "update e820 for early_reserve_e820\n");
1035 update_e820();
1036 update_e820_saved();
1037
1038 return addr;
1039}
1040
1041#ifdef CONFIG_X86_32
1042# ifdef CONFIG_X86_PAE
1043# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
1044# else
1045# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
1046# endif
1047#else /* CONFIG_X86_32 */
1048# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
1049#endif
1050
1051/*
1052 * Last pfn which the user wants to use.
1053 */
1054unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
1055
1056/*
1057 * Find the highest page frame number we have available
1058 */
1059static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
1060{
1061 int i;
1062 unsigned long last_pfn = 0;
1063 unsigned long max_arch_pfn = MAX_ARCH_PFN;
1064
1065 for (i = 0; i < e820.nr_map; i++) {
1066 struct e820entry *ei = &e820.map[i];
1067 unsigned long start_pfn;
1068 unsigned long end_pfn;
1069
1070 if (ei->type != type)
1071 continue;
1072
1073 start_pfn = ei->addr >> PAGE_SHIFT;
1074 end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
1075
1076 if (start_pfn >= limit_pfn)
1077 continue;
1078 if (end_pfn > limit_pfn) {
1079 last_pfn = limit_pfn;
1080 break;
1081 }
1082 if (end_pfn > last_pfn)
1083 last_pfn = end_pfn;
1084 }
1085
1086 if (last_pfn > max_arch_pfn)
1087 last_pfn = max_arch_pfn;
1088 if (last_pfn > end_user_pfn)
1089 last_pfn = end_user_pfn;
1090
1091 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
1092 last_pfn, max_arch_pfn);
1093 return last_pfn;
1094}
1095unsigned long __init e820_end_of_ram_pfn(void)
1096{
1097 return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
1098}
1099
1100unsigned long __init e820_end_of_low_ram_pfn(void)
1101{
1102 return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
1103}
1104/*
1105 * Finds an active region in the address range from start_pfn to last_pfn and
1106 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
1107 */
1108int __init e820_find_active_region(const struct e820entry *ei,
1109 unsigned long start_pfn,
1110 unsigned long last_pfn,
1111 unsigned long *ei_startpfn,
1112 unsigned long *ei_endpfn)
1113{
1114 u64 align = PAGE_SIZE;
1115
1116 *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
1117 *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
1118
1119 /* Skip map entries smaller than a page */
1120 if (*ei_startpfn >= *ei_endpfn)
1121 return 0;
1122
1123 /* Skip if map is outside the node */
1124 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
1125 *ei_startpfn >= last_pfn)
1126 return 0;
1127
1128 /* Check for overlaps */
1129 if (*ei_startpfn < start_pfn)
1130 *ei_startpfn = start_pfn;
1131 if (*ei_endpfn > last_pfn)
1132 *ei_endpfn = last_pfn;
1133
1134 /* Obey end_user_pfn to save on memmap */
1135 if (*ei_startpfn >= end_user_pfn)
1136 return 0;
1137 if (*ei_endpfn > end_user_pfn)
1138 *ei_endpfn = end_user_pfn;
1139
1140 return 1;
1141}
1142
1143/* Walk the e820 map and register active regions within a node */
1144void __init e820_register_active_regions(int nid, unsigned long start_pfn,
1145 unsigned long last_pfn)
1146{
1147 unsigned long ei_startpfn;
1148 unsigned long ei_endpfn;
1149 int i;
1150
1151 for (i = 0; i < e820.nr_map; i++)
1152 if (e820_find_active_region(&e820.map[i],
1153 start_pfn, last_pfn,
1154 &ei_startpfn, &ei_endpfn))
1155 add_active_range(nid, ei_startpfn, ei_endpfn);
1156}
1157
1158/*
1159 * Find the hole size (in bytes) in the memory range.
1160 * @start: starting address of the memory range to scan
1161 * @end: ending address of the memory range to scan
1162 */
1163u64 __init e820_hole_size(u64 start, u64 end)
1164{
1165 unsigned long start_pfn = start >> PAGE_SHIFT;
1166 unsigned long last_pfn = end >> PAGE_SHIFT;
1167 unsigned long ei_startpfn, ei_endpfn, ram = 0;
1168 int i;
1169
1170 for (i = 0; i < e820.nr_map; i++) {
1171 if (e820_find_active_region(&e820.map[i],
1172 start_pfn, last_pfn,
1173 &ei_startpfn, &ei_endpfn))
1174 ram += ei_endpfn - ei_startpfn;
1175 }
1176 return end - start - ((u64)ram << PAGE_SHIFT);
1177}
1178
1179static void early_panic(char *msg)
1180{
1181 early_printk(msg);
1182 panic(msg);
1183}
1184
1185static int userdef __initdata;
1186
1187/* "mem=nopentium" disables the 4MB page tables. */
1188static int __init parse_memopt(char *p)
1189{
1190 u64 mem_size;
1191
1192 if (!p)
1193 return -EINVAL;
1194
1195#ifdef CONFIG_X86_32
1196 if (!strcmp(p, "nopentium")) {
1197 setup_clear_cpu_cap(X86_FEATURE_PSE);
1198 return 0;
1199 }
1200#endif
1201
1202 userdef = 1;
1203 mem_size = memparse(p, &p);
1204 end_user_pfn = mem_size>>PAGE_SHIFT;
1205 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1206
1207 return 0;
1208}
1209early_param("mem", parse_memopt);
1210
1211static int __init parse_memmap_opt(char *p)
1212{
1213 char *oldp;
1214 u64 start_at, mem_size;
1215
1216 if (!p)
1217 return -EINVAL;
1218
1219 if (!strcmp(p, "exactmap")) {
1220#ifdef CONFIG_CRASH_DUMP
1221 /*
1222 * If we are doing a crash dump, we still need to know
1223 * the real mem size before original memory map is
1224 * reset.
1225 */
1226 saved_max_pfn = e820_end_of_ram_pfn();
1227#endif
1228 e820.nr_map = 0;
1229 userdef = 1;
1230 return 0;
1231 }
1232
1233 oldp = p;
1234 mem_size = memparse(p, &p);
1235 if (p == oldp)
1236 return -EINVAL;
1237
1238 userdef = 1;
1239 if (*p == '@') {
1240 start_at = memparse(p+1, &p);
1241 e820_add_region(start_at, mem_size, E820_RAM);
1242 } else if (*p == '#') {
1243 start_at = memparse(p+1, &p);
1244 e820_add_region(start_at, mem_size, E820_ACPI);
1245 } else if (*p == '$') {
1246 start_at = memparse(p+1, &p);
1247 e820_add_region(start_at, mem_size, E820_RESERVED);
1248 } else {
1249 end_user_pfn = (mem_size >> PAGE_SHIFT);
1250 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1251 }
1252 return *p == '\0' ? 0 : -EINVAL;
1253}
1254early_param("memmap", parse_memmap_opt);
1255
1256void __init finish_e820_parsing(void)
1257{
1258 if (userdef) {
1259 int nr = e820.nr_map;
1260
1261 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
1262 early_panic("Invalid user supplied memory map");
1263 e820.nr_map = nr;
1264
1265 printk(KERN_INFO "user-defined physical RAM map:\n");
1266 e820_print_map("user");
1267 }
1268}
1269
1270static inline const char *e820_type_to_string(int e820_type)
1271{
1272 switch (e820_type) {
1273 case E820_RESERVED_KERN:
1274 case E820_RAM: return "System RAM";
1275 case E820_ACPI: return "ACPI Tables";
1276 case E820_NVS: return "ACPI Non-volatile Storage";
1277 default: return "reserved";
1278 }
1279}
1280
1281/*
1282 * Mark e820 reserved areas as busy for the resource manager.
1283 */
1284void __init e820_reserve_resources(void)
1285{
1286 int i;
1287 struct resource *res;
1288 u64 end;
1289
1290 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1291 for (i = 0; i < e820.nr_map; i++) {
1292 end = e820.map[i].addr + e820.map[i].size - 1;
1293#ifndef CONFIG_RESOURCES_64BIT
1294 if (end > 0x100000000ULL) {
1295 res++;
1296 continue;
1297 }
1298#endif
1299 res->name = e820_type_to_string(e820.map[i].type);
1300 res->start = e820.map[i].addr;
1301 res->end = end;
1302
1303 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1304 insert_resource(&iomem_resource, res);
1305 res++;
1306 }
1307
1308 for (i = 0; i < e820_saved.nr_map; i++) {
1309 struct e820entry *entry = &e820_saved.map[i];
1310 firmware_map_add_early(entry->addr,
1311 entry->addr + entry->size - 1,
1312 e820_type_to_string(entry->type));
1313 }
1314}
1315
1316/*
1317 * Non-standard memory setup can be specified via this quirk:
1318 */
1319char * (*arch_memory_setup_quirk)(void);
1320
1321char *__init default_machine_specific_memory_setup(void)
1322{
1323 char *who = "BIOS-e820";
1324 int new_nr;
1325 /*
1326 * Try to copy the BIOS-supplied E820-map.
1327 *
1328 * Otherwise fake a memory map; one section from 0k->640k,
1329 * the next section from 1mb->appropriate_mem_k
1330 */
1331 new_nr = boot_params.e820_entries;
1332 sanitize_e820_map(boot_params.e820_map,
1333 ARRAY_SIZE(boot_params.e820_map),
1334 &new_nr);
1335 boot_params.e820_entries = new_nr;
1336 if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1337 < 0) {
1338 u64 mem_size;
1339
1340 /* compare results from other methods and take the greater */
1341 if (boot_params.alt_mem_k
1342 < boot_params.screen_info.ext_mem_k) {
1343 mem_size = boot_params.screen_info.ext_mem_k;
1344 who = "BIOS-88";
1345 } else {
1346 mem_size = boot_params.alt_mem_k;
1347 who = "BIOS-e801";
1348 }
1349
1350 e820.nr_map = 0;
1351 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1352 e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1353 }
1354
1355 /* In case someone cares... */
1356 return who;
1357}
1358
1359char *__init __attribute__((weak)) machine_specific_memory_setup(void)
1360{
1361 if (arch_memory_setup_quirk) {
1362 char *who = arch_memory_setup_quirk();
1363
1364 if (who)
1365 return who;
1366 }
1367 return default_machine_specific_memory_setup();
1368}
1369
1370/* Overridden in paravirt.c if CONFIG_PARAVIRT */
1371char * __init __attribute__((weak)) memory_setup(void)
1372{
1373 return machine_specific_memory_setup();
1374}
1375
1376void __init setup_memory_map(void)
1377{
1378 char *who;
1379
1380 who = memory_setup();
1381 memcpy(&e820_saved, &e820, sizeof(struct e820map));
1382 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1383 e820_print_map(who);
1384}
1385
1386#ifdef CONFIG_X86_64
1387int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
1388{
1389 int i;
1390
1391 if (slot < 0 || slot >= e820.nr_map)
1392 return -1;
1393 for (i = slot; i < e820.nr_map; i++) {
1394 if (e820.map[i].type != E820_RAM)
1395 continue;
1396 break;
1397 }
1398 if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
1399 return -1;
1400 *addr = e820.map[i].addr;
1401 *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
1402 max_pfn << PAGE_SHIFT) - *addr;
1403 return i + 1;
1404}
1405#endif
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
deleted file mode 100644
index ed733e7cf4e6..000000000000
--- a/arch/x86/kernel/e820_32.c
+++ /dev/null
@@ -1,775 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/bootmem.h>
5#include <linux/ioport.h>
6#include <linux/string.h>
7#include <linux/kexec.h>
8#include <linux/module.h>
9#include <linux/mm.h>
10#include <linux/pfn.h>
11#include <linux/uaccess.h>
12#include <linux/suspend.h>
13
14#include <asm/pgtable.h>
15#include <asm/page.h>
16#include <asm/e820.h>
17#include <asm/setup.h>
18
19struct e820map e820;
20struct change_member {
21 struct e820entry *pbios; /* pointer to original bios entry */
22 unsigned long long addr; /* address for this change point */
23};
24static struct change_member change_point_list[2*E820MAX] __initdata;
25static struct change_member *change_point[2*E820MAX] __initdata;
26static struct e820entry *overlap_list[E820MAX] __initdata;
27static struct e820entry new_bios[E820MAX] __initdata;
28/* For PCI or other memory-mapped resources */
29unsigned long pci_mem_start = 0x10000000;
30#ifdef CONFIG_PCI
31EXPORT_SYMBOL(pci_mem_start);
32#endif
33extern int user_defined_memmap;
34
35static struct resource system_rom_resource = {
36 .name = "System ROM",
37 .start = 0xf0000,
38 .end = 0xfffff,
39 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
40};
41
42static struct resource extension_rom_resource = {
43 .name = "Extension ROM",
44 .start = 0xe0000,
45 .end = 0xeffff,
46 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
47};
48
49static struct resource adapter_rom_resources[] = { {
50 .name = "Adapter ROM",
51 .start = 0xc8000,
52 .end = 0,
53 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
54}, {
55 .name = "Adapter ROM",
56 .start = 0,
57 .end = 0,
58 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
59}, {
60 .name = "Adapter ROM",
61 .start = 0,
62 .end = 0,
63 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
64}, {
65 .name = "Adapter ROM",
66 .start = 0,
67 .end = 0,
68 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
69}, {
70 .name = "Adapter ROM",
71 .start = 0,
72 .end = 0,
73 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
74}, {
75 .name = "Adapter ROM",
76 .start = 0,
77 .end = 0,
78 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
79} };
80
81static struct resource video_rom_resource = {
82 .name = "Video ROM",
83 .start = 0xc0000,
84 .end = 0xc7fff,
85 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
86};
87
88#define ROMSIGNATURE 0xaa55
89
90static int __init romsignature(const unsigned char *rom)
91{
92 const unsigned short * const ptr = (const unsigned short *)rom;
93 unsigned short sig;
94
95 return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
96}
97
98static int __init romchecksum(const unsigned char *rom, unsigned long length)
99{
100 unsigned char sum, c;
101
102 for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
103 sum += c;
104 return !length && !sum;
105}
106
107static void __init probe_roms(void)
108{
109 const unsigned char *rom;
110 unsigned long start, length, upper;
111 unsigned char c;
112 int i;
113
114 /* video rom */
115 upper = adapter_rom_resources[0].start;
116 for (start = video_rom_resource.start; start < upper; start += 2048) {
117 rom = isa_bus_to_virt(start);
118 if (!romsignature(rom))
119 continue;
120
121 video_rom_resource.start = start;
122
123 if (probe_kernel_address(rom + 2, c) != 0)
124 continue;
125
126 /* 0 < length <= 0x7f * 512, historically */
127 length = c * 512;
128
129 /* if checksum okay, trust length byte */
130 if (length && romchecksum(rom, length))
131 video_rom_resource.end = start + length - 1;
132
133 request_resource(&iomem_resource, &video_rom_resource);
134 break;
135 }
136
137 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
138 if (start < upper)
139 start = upper;
140
141 /* system rom */
142 request_resource(&iomem_resource, &system_rom_resource);
143 upper = system_rom_resource.start;
144
145 /* check for extension rom (ignore length byte!) */
146 rom = isa_bus_to_virt(extension_rom_resource.start);
147 if (romsignature(rom)) {
148 length = extension_rom_resource.end - extension_rom_resource.start + 1;
149 if (romchecksum(rom, length)) {
150 request_resource(&iomem_resource, &extension_rom_resource);
151 upper = extension_rom_resource.start;
152 }
153 }
154
155 /* check for adapter roms on 2k boundaries */
156 for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
157 rom = isa_bus_to_virt(start);
158 if (!romsignature(rom))
159 continue;
160
161 if (probe_kernel_address(rom + 2, c) != 0)
162 continue;
163
164 /* 0 < length <= 0x7f * 512, historically */
165 length = c * 512;
166
167 /* but accept any length that fits if checksum okay */
168 if (!length || start + length > upper || !romchecksum(rom, length))
169 continue;
170
171 adapter_rom_resources[i].start = start;
172 adapter_rom_resources[i].end = start + length - 1;
173 request_resource(&iomem_resource, &adapter_rom_resources[i]);
174
175 start = adapter_rom_resources[i++].end & ~2047UL;
176 }
177}
178
179/*
180 * Request address space for all standard RAM and ROM resources
181 * and also for regions reported as reserved by the e820.
182 */
183void __init init_iomem_resources(struct resource *code_resource,
184 struct resource *data_resource,
185 struct resource *bss_resource)
186{
187 int i;
188
189 probe_roms();
190 for (i = 0; i < e820.nr_map; i++) {
191 struct resource *res;
192#ifndef CONFIG_RESOURCES_64BIT
193 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
194 continue;
195#endif
196 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
197 switch (e820.map[i].type) {
198 case E820_RAM: res->name = "System RAM"; break;
199 case E820_ACPI: res->name = "ACPI Tables"; break;
200 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
201 default: res->name = "reserved";
202 }
203 res->start = e820.map[i].addr;
204 res->end = res->start + e820.map[i].size - 1;
205 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
206 if (request_resource(&iomem_resource, res)) {
207 kfree(res);
208 continue;
209 }
210 if (e820.map[i].type == E820_RAM) {
211 /*
212 * We don't know which RAM region contains kernel data,
213 * so we try it repeatedly and let the resource manager
214 * test it.
215 */
216 request_resource(res, code_resource);
217 request_resource(res, data_resource);
218 request_resource(res, bss_resource);
219#ifdef CONFIG_KEXEC
220 if (crashk_res.start != crashk_res.end)
221 request_resource(res, &crashk_res);
222#endif
223 }
224 }
225}
226
227#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
228/**
229 * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
230 * correspond to e820 RAM areas and mark the corresponding pages as nosave for
231 * hibernation.
232 *
233 * This function requires the e820 map to be sorted and without any
234 * overlapping entries and assumes the first e820 area to be RAM.
235 */
236void __init e820_mark_nosave_regions(void)
237{
238 int i;
239 unsigned long pfn;
240
241 pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
242 for (i = 1; i < e820.nr_map; i++) {
243 struct e820entry *ei = &e820.map[i];
244
245 if (pfn < PFN_UP(ei->addr))
246 register_nosave_region(pfn, PFN_UP(ei->addr));
247
248 pfn = PFN_DOWN(ei->addr + ei->size);
249 if (ei->type != E820_RAM)
250 register_nosave_region(PFN_UP(ei->addr), pfn);
251
252 if (pfn >= max_low_pfn)
253 break;
254 }
255}
256#endif
257
258void __init add_memory_region(unsigned long long start,
259 unsigned long long size, int type)
260{
261 int x;
262
263 x = e820.nr_map;
264
265 if (x == E820MAX) {
266 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
267 return;
268 }
269
270 e820.map[x].addr = start;
271 e820.map[x].size = size;
272 e820.map[x].type = type;
273 e820.nr_map++;
274} /* add_memory_region */
275
276/*
277 * Sanitize the BIOS e820 map.
278 *
279 * Some e820 responses include overlapping entries. The following
280 * replaces the original e820 map with a new one, removing overlaps.
281 *
282 */
283int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
284{
285 struct change_member *change_tmp;
286 unsigned long current_type, last_type;
287 unsigned long long last_addr;
288 int chgidx, still_changing;
289 int overlap_entries;
290 int new_bios_entry;
291 int old_nr, new_nr, chg_nr;
292 int i;
293
294 /*
295 Visually we're performing the following (1,2,3,4 = memory types)...
296
297 Sample memory map (w/overlaps):
298 ____22__________________
299 ______________________4_
300 ____1111________________
301 _44_____________________
302 11111111________________
303 ____________________33__
304 ___________44___________
305 __________33333_________
306 ______________22________
307 ___________________2222_
308 _________111111111______
309 _____________________11_
310 _________________4______
311
312 Sanitized equivalent (no overlap):
313 1_______________________
314 _44_____________________
315 ___1____________________
316 ____22__________________
317 ______11________________
318 _________1______________
319 __________3_____________
320 ___________44___________
321 _____________33_________
322 _______________2________
323 ________________1_______
324 _________________4______
325 ___________________2____
326 ____________________33__
327 ______________________4_
328 */
329 /* if there's only one memory region, don't bother */
330 if (*pnr_map < 2) {
331 return -1;
332 }
333
334 old_nr = *pnr_map;
335
336 /* bail out if we find any unreasonable addresses in bios map */
337 for (i=0; i<old_nr; i++)
338 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
339 return -1;
340 }
341
342 /* create pointers for initial change-point information (for sorting) */
343 for (i=0; i < 2*old_nr; i++)
344 change_point[i] = &change_point_list[i];
345
346 /* record all known change-points (starting and ending addresses),
347 omitting those that are for empty memory regions */
348 chgidx = 0;
349 for (i=0; i < old_nr; i++) {
350 if (biosmap[i].size != 0) {
351 change_point[chgidx]->addr = biosmap[i].addr;
352 change_point[chgidx++]->pbios = &biosmap[i];
353 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
354 change_point[chgidx++]->pbios = &biosmap[i];
355 }
356 }
357 chg_nr = chgidx; /* true number of change-points */
358
359 /* sort change-point list by memory addresses (low -> high) */
360 still_changing = 1;
361 while (still_changing) {
362 still_changing = 0;
363 for (i=1; i < chg_nr; i++) {
364 /* if <current_addr> > <last_addr>, swap */
365 /* or, if current=<start_addr> & last=<end_addr>, swap */
366 if ((change_point[i]->addr < change_point[i-1]->addr) ||
367 ((change_point[i]->addr == change_point[i-1]->addr) &&
368 (change_point[i]->addr == change_point[i]->pbios->addr) &&
369 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
370 )
371 {
372 change_tmp = change_point[i];
373 change_point[i] = change_point[i-1];
374 change_point[i-1] = change_tmp;
375 still_changing=1;
376 }
377 }
378 }
379
380 /* create a new bios memory map, removing overlaps */
381 overlap_entries=0; /* number of entries in the overlap table */
382 new_bios_entry=0; /* index for creating new bios map entries */
383 last_type = 0; /* start with undefined memory type */
384 last_addr = 0; /* start with 0 as last starting address */
385 /* loop through change-points, determining affect on the new bios map */
386 for (chgidx=0; chgidx < chg_nr; chgidx++)
387 {
388 /* keep track of all overlapping bios entries */
389 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
390 {
391 /* add map entry to overlap list (> 1 entry implies an overlap) */
392 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
393 }
394 else
395 {
396 /* remove entry from list (order independent, so swap with last) */
397 for (i=0; i<overlap_entries; i++)
398 {
399 if (overlap_list[i] == change_point[chgidx]->pbios)
400 overlap_list[i] = overlap_list[overlap_entries-1];
401 }
402 overlap_entries--;
403 }
404 /* if there are overlapping entries, decide which "type" to use */
405 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
406 current_type = 0;
407 for (i=0; i<overlap_entries; i++)
408 if (overlap_list[i]->type > current_type)
409 current_type = overlap_list[i]->type;
410 /* continue building up new bios map based on this information */
411 if (current_type != last_type) {
412 if (last_type != 0) {
413 new_bios[new_bios_entry].size =
414 change_point[chgidx]->addr - last_addr;
415 /* move forward only if the new size was non-zero */
416 if (new_bios[new_bios_entry].size != 0)
417 if (++new_bios_entry >= E820MAX)
418 break; /* no more space left for new bios entries */
419 }
420 if (current_type != 0) {
421 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
422 new_bios[new_bios_entry].type = current_type;
423 last_addr=change_point[chgidx]->addr;
424 }
425 last_type = current_type;
426 }
427 }
428 new_nr = new_bios_entry; /* retain count for new bios entries */
429
430 /* copy new bios mapping into original location */
431 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
432 *pnr_map = new_nr;
433
434 return 0;
435}
436
437/*
438 * Copy the BIOS e820 map into a safe place.
439 *
440 * Sanity-check it while we're at it..
441 *
442 * If we're lucky and live on a modern system, the setup code
443 * will have given us a memory map that we can use to properly
444 * set up memory. If we aren't, we'll fake a memory map.
445 *
446 * We check to see that the memory map contains at least 2 elements
447 * before we'll use it, because the detection code in setup.S may
448 * not be perfect and most every PC known to man has two memory
449 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
450 * thinkpad 560x, for example, does not cooperate with the memory
451 * detection code.)
452 */
453int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
454{
455 /* Only one memory region (or negative)? Ignore it */
456 if (nr_map < 2)
457 return -1;
458
459 do {
460 u64 start = biosmap->addr;
461 u64 size = biosmap->size;
462 u64 end = start + size;
463 u32 type = biosmap->type;
464
465 /* Overflow in 64 bits? Ignore the memory map. */
466 if (start > end)
467 return -1;
468
469 add_memory_region(start, size, type);
470 } while (biosmap++, --nr_map);
471
472 return 0;
473}
474
475/*
476 * Find the highest page frame number we have available
477 */
478void __init propagate_e820_map(void)
479{
480 int i;
481
482 max_pfn = 0;
483
484 for (i = 0; i < e820.nr_map; i++) {
485 unsigned long start, end;
486 /* RAM? */
487 if (e820.map[i].type != E820_RAM)
488 continue;
489 start = PFN_UP(e820.map[i].addr);
490 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
491 if (start >= end)
492 continue;
493 if (end > max_pfn)
494 max_pfn = end;
495 memory_present(0, start, end);
496 }
497}
498
499/*
500 * Register fully available low RAM pages with the bootmem allocator.
501 */
502void __init register_bootmem_low_pages(unsigned long max_low_pfn)
503{
504 int i;
505
506 for (i = 0; i < e820.nr_map; i++) {
507 unsigned long curr_pfn, last_pfn, size;
508 /*
509 * Reserve usable low memory
510 */
511 if (e820.map[i].type != E820_RAM)
512 continue;
513 /*
514 * We are rounding up the start address of usable memory:
515 */
516 curr_pfn = PFN_UP(e820.map[i].addr);
517 if (curr_pfn >= max_low_pfn)
518 continue;
519 /*
520 * ... and at the end of the usable range downwards:
521 */
522 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
523
524 if (last_pfn > max_low_pfn)
525 last_pfn = max_low_pfn;
526
527 /*
528 * .. finally, did all the rounding and playing
529 * around just make the area go away?
530 */
531 if (last_pfn <= curr_pfn)
532 continue;
533
534 size = last_pfn - curr_pfn;
535 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
536 }
537}
538
539void __init e820_register_memory(void)
540{
541 unsigned long gapstart, gapsize, round;
542 unsigned long long last;
543 int i;
544
545 /*
546 * Search for the biggest gap in the low 32 bits of the e820
547 * memory space.
548 */
549 last = 0x100000000ull;
550 gapstart = 0x10000000;
551 gapsize = 0x400000;
552 i = e820.nr_map;
553 while (--i >= 0) {
554 unsigned long long start = e820.map[i].addr;
555 unsigned long long end = start + e820.map[i].size;
556
557 /*
558 * Since "last" is at most 4GB, we know we'll
559 * fit in 32 bits if this condition is true
560 */
561 if (last > end) {
562 unsigned long gap = last - end;
563
564 if (gap > gapsize) {
565 gapsize = gap;
566 gapstart = end;
567 }
568 }
569 if (start < last)
570 last = start;
571 }
572
573 /*
574 * See how much we want to round up: start off with
575 * rounding to the next 1MB area.
576 */
577 round = 0x100000;
578 while ((gapsize >> 4) > round)
579 round += round;
580 /* Fun with two's complement */
581 pci_mem_start = (gapstart + round) & -round;
582
583 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
584 pci_mem_start, gapstart, gapsize);
585}
586
587void __init print_memory_map(char *who)
588{
589 int i;
590
591 for (i = 0; i < e820.nr_map; i++) {
592 printk(" %s: %016Lx - %016Lx ", who,
593 e820.map[i].addr,
594 e820.map[i].addr + e820.map[i].size);
595 switch (e820.map[i].type) {
596 case E820_RAM: printk("(usable)\n");
597 break;
598 case E820_RESERVED:
599 printk("(reserved)\n");
600 break;
601 case E820_ACPI:
602 printk("(ACPI data)\n");
603 break;
604 case E820_NVS:
605 printk("(ACPI NVS)\n");
606 break;
607 default: printk("type %u\n", e820.map[i].type);
608 break;
609 }
610 }
611}
612
613void __init limit_regions(unsigned long long size)
614{
615 unsigned long long current_addr;
616 int i;
617
618 print_memory_map("limit_regions start");
619 for (i = 0; i < e820.nr_map; i++) {
620 current_addr = e820.map[i].addr + e820.map[i].size;
621 if (current_addr < size)
622 continue;
623
624 if (e820.map[i].type != E820_RAM)
625 continue;
626
627 if (e820.map[i].addr >= size) {
628 /*
629 * This region starts past the end of the
630 * requested size, skip it completely.
631 */
632 e820.nr_map = i;
633 } else {
634 e820.nr_map = i + 1;
635 e820.map[i].size -= current_addr - size;
636 }
637 print_memory_map("limit_regions endfor");
638 return;
639 }
640 print_memory_map("limit_regions endfunc");
641}
642
643/*
644 * This function checks if any part of the range <start,end> is mapped
645 * with type.
646 */
647int
648e820_any_mapped(u64 start, u64 end, unsigned type)
649{
650 int i;
651 for (i = 0; i < e820.nr_map; i++) {
652 const struct e820entry *ei = &e820.map[i];
653 if (type && ei->type != type)
654 continue;
655 if (ei->addr >= end || ei->addr + ei->size <= start)
656 continue;
657 return 1;
658 }
659 return 0;
660}
661EXPORT_SYMBOL_GPL(e820_any_mapped);
662
663 /*
664 * This function checks if the entire range <start,end> is mapped with type.
665 *
666 * Note: this function only works correct if the e820 table is sorted and
667 * not-overlapping, which is the case
668 */
669int __init
670e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
671{
672 u64 start = s;
673 u64 end = e;
674 int i;
675 for (i = 0; i < e820.nr_map; i++) {
676 struct e820entry *ei = &e820.map[i];
677 if (type && ei->type != type)
678 continue;
679 /* is the region (part) in overlap with the current region ?*/
680 if (ei->addr >= end || ei->addr + ei->size <= start)
681 continue;
682 /* if the region is at the beginning of <start,end> we move
683 * start to the end of the region since it's ok until there
684 */
685 if (ei->addr <= start)
686 start = ei->addr + ei->size;
687 /* if start is now at or beyond end, we're done, full
688 * coverage */
689 if (start >= end)
690 return 1; /* we're done */
691 }
692 return 0;
693}
694
695static int __init parse_memmap(char *arg)
696{
697 if (!arg)
698 return -EINVAL;
699
700 if (strcmp(arg, "exactmap") == 0) {
701#ifdef CONFIG_CRASH_DUMP
702 /* If we are doing a crash dump, we
703 * still need to know the real mem
704 * size before original memory map is
705 * reset.
706 */
707 propagate_e820_map();
708 saved_max_pfn = max_pfn;
709#endif
710 e820.nr_map = 0;
711 user_defined_memmap = 1;
712 } else {
713 /* If the user specifies memory size, we
714 * limit the BIOS-provided memory map to
715 * that size. exactmap can be used to specify
716 * the exact map. mem=number can be used to
717 * trim the existing memory map.
718 */
719 unsigned long long start_at, mem_size;
720
721 mem_size = memparse(arg, &arg);
722 if (*arg == '@') {
723 start_at = memparse(arg+1, &arg);
724 add_memory_region(start_at, mem_size, E820_RAM);
725 } else if (*arg == '#') {
726 start_at = memparse(arg+1, &arg);
727 add_memory_region(start_at, mem_size, E820_ACPI);
728 } else if (*arg == '$') {
729 start_at = memparse(arg+1, &arg);
730 add_memory_region(start_at, mem_size, E820_RESERVED);
731 } else {
732 limit_regions(mem_size);
733 user_defined_memmap = 1;
734 }
735 }
736 return 0;
737}
738early_param("memmap", parse_memmap);
739void __init update_memory_range(u64 start, u64 size, unsigned old_type,
740 unsigned new_type)
741{
742 int i;
743
744 BUG_ON(old_type == new_type);
745
746 for (i = 0; i < e820.nr_map; i++) {
747 struct e820entry *ei = &e820.map[i];
748 u64 final_start, final_end;
749 if (ei->type != old_type)
750 continue;
751 /* totally covered? */
752 if (ei->addr >= start && ei->size <= size) {
753 ei->type = new_type;
754 continue;
755 }
756 /* partially covered */
757 final_start = max(start, ei->addr);
758 final_end = min(start + size, ei->addr + ei->size);
759 if (final_start >= final_end)
760 continue;
761 add_memory_region(final_start, final_end - final_start,
762 new_type);
763 }
764}
765void __init update_e820(void)
766{
767 u8 nr_map;
768
769 nr_map = e820.nr_map;
770 if (sanitize_e820_map(e820.map, &nr_map))
771 return;
772 e820.nr_map = nr_map;
773 printk(KERN_INFO "modified physical RAM map:\n");
774 print_memory_map("modified");
775}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
deleted file mode 100644
index 124480c0008d..000000000000
--- a/arch/x86/kernel/e820_64.c
+++ /dev/null
@@ -1,952 +0,0 @@
1/*
2 * Handle the memory map.
3 * The functions here do the job until bootmem takes over.
4 *
5 * Getting sanitize_e820_map() in sync with i386 version by applying change:
6 * - Provisions for empty E820 memory regions (reported by certain BIOSes).
7 * Alex Achenbach <xela@slit.de>, December 2002.
8 * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
9 *
10 */
11#include <linux/kernel.h>
12#include <linux/types.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/ioport.h>
16#include <linux/string.h>
17#include <linux/kexec.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/suspend.h>
21#include <linux/pfn.h>
22
23#include <asm/pgtable.h>
24#include <asm/page.h>
25#include <asm/e820.h>
26#include <asm/proto.h>
27#include <asm/setup.h>
28#include <asm/sections.h>
29#include <asm/kdebug.h>
30#include <asm/trampoline.h>
31
32struct e820map e820;
33
34/*
35 * PFN of last memory page.
36 */
37unsigned long end_pfn;
38
39/*
40 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
41 * The direct mapping extends to max_pfn_mapped, so that we can directly access
42 * apertures, ACPI and other tables without having to play with fixmaps.
43 */
44unsigned long max_pfn_mapped;
45
46/*
47 * Last pfn which the user wants to use.
48 */
49static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
50
51/*
52 * Early reserved memory areas.
53 */
54#define MAX_EARLY_RES 20
55
56struct early_res {
57 unsigned long start, end;
58 char name[16];
59};
60static struct early_res early_res[MAX_EARLY_RES] __initdata = {
61 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
62#ifdef CONFIG_X86_TRAMPOLINE
63 { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
64#endif
65 {}
66};
67
68void __init reserve_early(unsigned long start, unsigned long end, char *name)
69{
70 int i;
71 struct early_res *r;
72 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
73 r = &early_res[i];
74 if (end > r->start && start < r->end)
75 panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
76 start, end - 1, name?name:"", r->start, r->end - 1, r->name);
77 }
78 if (i >= MAX_EARLY_RES)
79 panic("Too many early reservations");
80 r = &early_res[i];
81 r->start = start;
82 r->end = end;
83 if (name)
84 strncpy(r->name, name, sizeof(r->name) - 1);
85}
86
87void __init free_early(unsigned long start, unsigned long end)
88{
89 struct early_res *r;
90 int i, j;
91
92 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
93 r = &early_res[i];
94 if (start == r->start && end == r->end)
95 break;
96 }
97 if (i >= MAX_EARLY_RES || !early_res[i].end)
98 panic("free_early on not reserved area: %lx-%lx!", start, end);
99
100 for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
101 ;
102
103 memmove(&early_res[i], &early_res[i + 1],
104 (j - 1 - i) * sizeof(struct early_res));
105
106 early_res[j - 1].end = 0;
107}
108
109void __init early_res_to_bootmem(unsigned long start, unsigned long end)
110{
111 int i;
112 unsigned long final_start, final_end;
113 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
114 struct early_res *r = &early_res[i];
115 final_start = max(start, r->start);
116 final_end = min(end, r->end);
117 if (final_start >= final_end)
118 continue;
119 printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
120 final_start, final_end - 1, r->name);
121 reserve_bootmem_generic(final_start, final_end - final_start);
122 }
123}
124
125/* Check for already reserved areas */
126static inline int __init
127bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
128{
129 int i;
130 unsigned long addr = *addrp, last;
131 int changed = 0;
132again:
133 last = addr + size;
134 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
135 struct early_res *r = &early_res[i];
136 if (last >= r->start && addr < r->end) {
137 *addrp = addr = round_up(r->end, align);
138 changed = 1;
139 goto again;
140 }
141 }
142 return changed;
143}
144
145/* Check for already reserved areas */
146static inline int __init
147bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
148{
149 int i;
150 unsigned long addr = *addrp, last;
151 unsigned long size = *sizep;
152 int changed = 0;
153again:
154 last = addr + size;
155 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
156 struct early_res *r = &early_res[i];
157 if (last > r->start && addr < r->start) {
158 size = r->start - addr;
159 changed = 1;
160 goto again;
161 }
162 if (last > r->end && addr < r->end) {
163 addr = round_up(r->end, align);
164 size = last - addr;
165 changed = 1;
166 goto again;
167 }
168 if (last <= r->end && addr >= r->start) {
169 (*sizep)++;
170 return 0;
171 }
172 }
173 if (changed) {
174 *addrp = addr;
175 *sizep = size;
176 }
177 return changed;
178}
179/*
180 * This function checks if any part of the range <start,end> is mapped
181 * with type.
182 */
183int
184e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
185{
186 int i;
187
188 for (i = 0; i < e820.nr_map; i++) {
189 struct e820entry *ei = &e820.map[i];
190
191 if (type && ei->type != type)
192 continue;
193 if (ei->addr >= end || ei->addr + ei->size <= start)
194 continue;
195 return 1;
196 }
197 return 0;
198}
199EXPORT_SYMBOL_GPL(e820_any_mapped);
200
201/*
202 * This function checks if the entire range <start,end> is mapped with type.
203 *
204 * Note: this function only works correct if the e820 table is sorted and
205 * not-overlapping, which is the case
206 */
207int __init e820_all_mapped(unsigned long start, unsigned long end,
208 unsigned type)
209{
210 int i;
211
212 for (i = 0; i < e820.nr_map; i++) {
213 struct e820entry *ei = &e820.map[i];
214
215 if (type && ei->type != type)
216 continue;
217 /* is the region (part) in overlap with the current region ?*/
218 if (ei->addr >= end || ei->addr + ei->size <= start)
219 continue;
220
221 /* if the region is at the beginning of <start,end> we move
222 * start to the end of the region since it's ok until there
223 */
224 if (ei->addr <= start)
225 start = ei->addr + ei->size;
226 /*
227 * if start is now at or beyond end, we're done, full
228 * coverage
229 */
230 if (start >= end)
231 return 1;
232 }
233 return 0;
234}
235
236/*
237 * Find a free area with specified alignment in a specific range.
238 */
239unsigned long __init find_e820_area(unsigned long start, unsigned long end,
240 unsigned long size, unsigned long align)
241{
242 int i;
243
244 for (i = 0; i < e820.nr_map; i++) {
245 struct e820entry *ei = &e820.map[i];
246 unsigned long addr, last;
247 unsigned long ei_last;
248
249 if (ei->type != E820_RAM)
250 continue;
251 addr = round_up(ei->addr, align);
252 ei_last = ei->addr + ei->size;
253 if (addr < start)
254 addr = round_up(start, align);
255 if (addr >= ei_last)
256 continue;
257 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
258 ;
259 last = addr + size;
260 if (last > ei_last)
261 continue;
262 if (last > end)
263 continue;
264 return addr;
265 }
266 return -1UL;
267}
268
269/*
270 * Find next free range after *start
271 */
272unsigned long __init find_e820_area_size(unsigned long start,
273 unsigned long *sizep,
274 unsigned long align)
275{
276 int i;
277
278 for (i = 0; i < e820.nr_map; i++) {
279 struct e820entry *ei = &e820.map[i];
280 unsigned long addr, last;
281 unsigned long ei_last;
282
283 if (ei->type != E820_RAM)
284 continue;
285 addr = round_up(ei->addr, align);
286 ei_last = ei->addr + ei->size;
287 if (addr < start)
288 addr = round_up(start, align);
289 if (addr >= ei_last)
290 continue;
291 *sizep = ei_last - addr;
292 while (bad_addr_size(&addr, sizep, align) &&
293 addr + *sizep <= ei_last)
294 ;
295 last = addr + *sizep;
296 if (last > ei_last)
297 continue;
298 return addr;
299 }
300 return -1UL;
301
302}
303/*
304 * Find the highest page frame number we have available
305 */
306unsigned long __init e820_end_of_ram(void)
307{
308 unsigned long end_pfn;
309
310 end_pfn = find_max_pfn_with_active_regions();
311
312 if (end_pfn > max_pfn_mapped)
313 max_pfn_mapped = end_pfn;
314 if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
315 max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
316 if (end_pfn > end_user_pfn)
317 end_pfn = end_user_pfn;
318 if (end_pfn > max_pfn_mapped)
319 end_pfn = max_pfn_mapped;
320
321 printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
322 return end_pfn;
323}
324
325/*
326 * Mark e820 reserved areas as busy for the resource manager.
327 */
328void __init e820_reserve_resources(void)
329{
330 int i;
331 struct resource *res;
332
333 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
334 for (i = 0; i < e820.nr_map; i++) {
335 switch (e820.map[i].type) {
336 case E820_RAM: res->name = "System RAM"; break;
337 case E820_ACPI: res->name = "ACPI Tables"; break;
338 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
339 default: res->name = "reserved";
340 }
341 res->start = e820.map[i].addr;
342 res->end = res->start + e820.map[i].size - 1;
343 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
344 insert_resource(&iomem_resource, res);
345 res++;
346 }
347}
348
349/*
350 * Find the ranges of physical addresses that do not correspond to
351 * e820 RAM areas and mark the corresponding pages as nosave for software
352 * suspend and suspend to RAM.
353 *
354 * This function requires the e820 map to be sorted and without any
355 * overlapping entries and assumes the first e820 area to be RAM.
356 */
357void __init e820_mark_nosave_regions(void)
358{
359 int i;
360 unsigned long paddr;
361
362 paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
363 for (i = 1; i < e820.nr_map; i++) {
364 struct e820entry *ei = &e820.map[i];
365
366 if (paddr < ei->addr)
367 register_nosave_region(PFN_DOWN(paddr),
368 PFN_UP(ei->addr));
369
370 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
371 if (ei->type != E820_RAM)
372 register_nosave_region(PFN_UP(ei->addr),
373 PFN_DOWN(paddr));
374
375 if (paddr >= (end_pfn << PAGE_SHIFT))
376 break;
377 }
378}
379
380/*
381 * Finds an active region in the address range from start_pfn to end_pfn and
382 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
383 */
384static int __init e820_find_active_region(const struct e820entry *ei,
385 unsigned long start_pfn,
386 unsigned long end_pfn,
387 unsigned long *ei_startpfn,
388 unsigned long *ei_endpfn)
389{
390 *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
391 *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
392
393 /* Skip map entries smaller than a page */
394 if (*ei_startpfn >= *ei_endpfn)
395 return 0;
396
397 /* Check if max_pfn_mapped should be updated */
398 if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
399 max_pfn_mapped = *ei_endpfn;
400
401 /* Skip if map is outside the node */
402 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
403 *ei_startpfn >= end_pfn)
404 return 0;
405
406 /* Check for overlaps */
407 if (*ei_startpfn < start_pfn)
408 *ei_startpfn = start_pfn;
409 if (*ei_endpfn > end_pfn)
410 *ei_endpfn = end_pfn;
411
412 /* Obey end_user_pfn to save on memmap */
413 if (*ei_startpfn >= end_user_pfn)
414 return 0;
415 if (*ei_endpfn > end_user_pfn)
416 *ei_endpfn = end_user_pfn;
417
418 return 1;
419}
420
421/* Walk the e820 map and register active regions within a node */
422void __init
423e820_register_active_regions(int nid, unsigned long start_pfn,
424 unsigned long end_pfn)
425{
426 unsigned long ei_startpfn;
427 unsigned long ei_endpfn;
428 int i;
429
430 for (i = 0; i < e820.nr_map; i++)
431 if (e820_find_active_region(&e820.map[i],
432 start_pfn, end_pfn,
433 &ei_startpfn, &ei_endpfn))
434 add_active_range(nid, ei_startpfn, ei_endpfn);
435}
436
437/*
438 * Add a memory region to the kernel e820 map.
439 */
440void __init add_memory_region(unsigned long start, unsigned long size, int type)
441{
442 int x = e820.nr_map;
443
444 if (x == E820MAX) {
445 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
446 return;
447 }
448
449 e820.map[x].addr = start;
450 e820.map[x].size = size;
451 e820.map[x].type = type;
452 e820.nr_map++;
453}
454
455/*
456 * Find the hole size (in bytes) in the memory range.
457 * @start: starting address of the memory range to scan
458 * @end: ending address of the memory range to scan
459 */
460unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
461{
462 unsigned long start_pfn = start >> PAGE_SHIFT;
463 unsigned long end_pfn = end >> PAGE_SHIFT;
464 unsigned long ei_startpfn, ei_endpfn, ram = 0;
465 int i;
466
467 for (i = 0; i < e820.nr_map; i++) {
468 if (e820_find_active_region(&e820.map[i],
469 start_pfn, end_pfn,
470 &ei_startpfn, &ei_endpfn))
471 ram += ei_endpfn - ei_startpfn;
472 }
473 return end - start - (ram << PAGE_SHIFT);
474}
475
476static void __init e820_print_map(char *who)
477{
478 int i;
479
480 for (i = 0; i < e820.nr_map; i++) {
481 printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
482 (unsigned long long) e820.map[i].addr,
483 (unsigned long long)
484 (e820.map[i].addr + e820.map[i].size));
485 switch (e820.map[i].type) {
486 case E820_RAM:
487 printk(KERN_CONT "(usable)\n");
488 break;
489 case E820_RESERVED:
490 printk(KERN_CONT "(reserved)\n");
491 break;
492 case E820_ACPI:
493 printk(KERN_CONT "(ACPI data)\n");
494 break;
495 case E820_NVS:
496 printk(KERN_CONT "(ACPI NVS)\n");
497 break;
498 default:
499 printk(KERN_CONT "type %u\n", e820.map[i].type);
500 break;
501 }
502 }
503}
504
505/*
506 * Sanitize the BIOS e820 map.
507 *
508 * Some e820 responses include overlapping entries. The following
509 * replaces the original e820 map with a new one, removing overlaps.
510 *
511 */
512static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
513{
514 struct change_member {
515 struct e820entry *pbios; /* pointer to original bios entry */
516 unsigned long long addr; /* address for this change point */
517 };
518 static struct change_member change_point_list[2*E820MAX] __initdata;
519 static struct change_member *change_point[2*E820MAX] __initdata;
520 static struct e820entry *overlap_list[E820MAX] __initdata;
521 static struct e820entry new_bios[E820MAX] __initdata;
522 struct change_member *change_tmp;
523 unsigned long current_type, last_type;
524 unsigned long long last_addr;
525 int chgidx, still_changing;
526 int overlap_entries;
527 int new_bios_entry;
528 int old_nr, new_nr, chg_nr;
529 int i;
530
531 /*
532 Visually we're performing the following
533 (1,2,3,4 = memory types)...
534
535 Sample memory map (w/overlaps):
536 ____22__________________
537 ______________________4_
538 ____1111________________
539 _44_____________________
540 11111111________________
541 ____________________33__
542 ___________44___________
543 __________33333_________
544 ______________22________
545 ___________________2222_
546 _________111111111______
547 _____________________11_
548 _________________4______
549
550 Sanitized equivalent (no overlap):
551 1_______________________
552 _44_____________________
553 ___1____________________
554 ____22__________________
555 ______11________________
556 _________1______________
557 __________3_____________
558 ___________44___________
559 _____________33_________
560 _______________2________
561 ________________1_______
562 _________________4______
563 ___________________2____
564 ____________________33__
565 ______________________4_
566 */
567
568 /* if there's only one memory region, don't bother */
569 if (*pnr_map < 2)
570 return -1;
571
572 old_nr = *pnr_map;
573
574 /* bail out if we find any unreasonable addresses in bios map */
575 for (i = 0; i < old_nr; i++)
576 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
577 return -1;
578
579 /* create pointers for initial change-point information (for sorting) */
580 for (i = 0; i < 2 * old_nr; i++)
581 change_point[i] = &change_point_list[i];
582
583 /* record all known change-points (starting and ending addresses),
584 omitting those that are for empty memory regions */
585 chgidx = 0;
586 for (i = 0; i < old_nr; i++) {
587 if (biosmap[i].size != 0) {
588 change_point[chgidx]->addr = biosmap[i].addr;
589 change_point[chgidx++]->pbios = &biosmap[i];
590 change_point[chgidx]->addr = biosmap[i].addr +
591 biosmap[i].size;
592 change_point[chgidx++]->pbios = &biosmap[i];
593 }
594 }
595 chg_nr = chgidx;
596
597 /* sort change-point list by memory addresses (low -> high) */
598 still_changing = 1;
599 while (still_changing) {
600 still_changing = 0;
601 for (i = 1; i < chg_nr; i++) {
602 unsigned long long curaddr, lastaddr;
603 unsigned long long curpbaddr, lastpbaddr;
604
605 curaddr = change_point[i]->addr;
606 lastaddr = change_point[i - 1]->addr;
607 curpbaddr = change_point[i]->pbios->addr;
608 lastpbaddr = change_point[i - 1]->pbios->addr;
609
610 /*
611 * swap entries, when:
612 *
613 * curaddr > lastaddr or
614 * curaddr == lastaddr and curaddr == curpbaddr and
615 * lastaddr != lastpbaddr
616 */
617 if (curaddr < lastaddr ||
618 (curaddr == lastaddr && curaddr == curpbaddr &&
619 lastaddr != lastpbaddr)) {
620 change_tmp = change_point[i];
621 change_point[i] = change_point[i-1];
622 change_point[i-1] = change_tmp;
623 still_changing = 1;
624 }
625 }
626 }
627
628 /* create a new bios memory map, removing overlaps */
629 overlap_entries = 0; /* number of entries in the overlap table */
630 new_bios_entry = 0; /* index for creating new bios map entries */
631 last_type = 0; /* start with undefined memory type */
632 last_addr = 0; /* start with 0 as last starting address */
633
634 /* loop through change-points, determining affect on the new bios map */
635 for (chgidx = 0; chgidx < chg_nr; chgidx++) {
636 /* keep track of all overlapping bios entries */
637 if (change_point[chgidx]->addr ==
638 change_point[chgidx]->pbios->addr) {
639 /*
640 * add map entry to overlap list (> 1 entry
641 * implies an overlap)
642 */
643 overlap_list[overlap_entries++] =
644 change_point[chgidx]->pbios;
645 } else {
646 /*
647 * remove entry from list (order independent,
648 * so swap with last)
649 */
650 for (i = 0; i < overlap_entries; i++) {
651 if (overlap_list[i] ==
652 change_point[chgidx]->pbios)
653 overlap_list[i] =
654 overlap_list[overlap_entries-1];
655 }
656 overlap_entries--;
657 }
658 /*
659 * if there are overlapping entries, decide which
660 * "type" to use (larger value takes precedence --
661 * 1=usable, 2,3,4,4+=unusable)
662 */
663 current_type = 0;
664 for (i = 0; i < overlap_entries; i++)
665 if (overlap_list[i]->type > current_type)
666 current_type = overlap_list[i]->type;
667 /*
668 * continue building up new bios map based on this
669 * information
670 */
671 if (current_type != last_type) {
672 if (last_type != 0) {
673 new_bios[new_bios_entry].size =
674 change_point[chgidx]->addr - last_addr;
675 /*
676 * move forward only if the new size
677 * was non-zero
678 */
679 if (new_bios[new_bios_entry].size != 0)
680 /*
681 * no more space left for new
682 * bios entries ?
683 */
684 if (++new_bios_entry >= E820MAX)
685 break;
686 }
687 if (current_type != 0) {
688 new_bios[new_bios_entry].addr =
689 change_point[chgidx]->addr;
690 new_bios[new_bios_entry].type = current_type;
691 last_addr = change_point[chgidx]->addr;
692 }
693 last_type = current_type;
694 }
695 }
696 /* retain count for new bios entries */
697 new_nr = new_bios_entry;
698
699 /* copy new bios mapping into original location */
700 memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
701 *pnr_map = new_nr;
702
703 return 0;
704}
705
706/*
707 * Copy the BIOS e820 map into a safe place.
708 *
709 * Sanity-check it while we're at it..
710 *
711 * If we're lucky and live on a modern system, the setup code
712 * will have given us a memory map that we can use to properly
713 * set up memory. If we aren't, we'll fake a memory map.
714 */
715static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
716{
717 /* Only one memory region (or negative)? Ignore it */
718 if (nr_map < 2)
719 return -1;
720
721 do {
722 u64 start = biosmap->addr;
723 u64 size = biosmap->size;
724 u64 end = start + size;
725 u32 type = biosmap->type;
726
727 /* Overflow in 64 bits? Ignore the memory map. */
728 if (start > end)
729 return -1;
730
731 add_memory_region(start, size, type);
732 } while (biosmap++, --nr_map);
733 return 0;
734}
735
736static void early_panic(char *msg)
737{
738 early_printk(msg);
739 panic(msg);
740}
741
742/* We're not void only for x86 32-bit compat */
743char * __init machine_specific_memory_setup(void)
744{
745 char *who = "BIOS-e820";
746 /*
747 * Try to copy the BIOS-supplied E820-map.
748 *
749 * Otherwise fake a memory map; one section from 0k->640k,
750 * the next section from 1mb->appropriate_mem_k
751 */
752 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
753 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
754 early_panic("Cannot find a valid memory map");
755 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
756 e820_print_map(who);
757
758 /* In case someone cares... */
759 return who;
760}
761
762static int __init parse_memopt(char *p)
763{
764 if (!p)
765 return -EINVAL;
766 end_user_pfn = memparse(p, &p);
767 end_user_pfn >>= PAGE_SHIFT;
768 return 0;
769}
770early_param("mem", parse_memopt);
771
772static int userdef __initdata;
773
774static int __init parse_memmap_opt(char *p)
775{
776 char *oldp;
777 unsigned long long start_at, mem_size;
778
779 if (!strcmp(p, "exactmap")) {
780#ifdef CONFIG_CRASH_DUMP
781 /*
782 * If we are doing a crash dump, we still need to know
783 * the real mem size before original memory map is
784 * reset.
785 */
786 e820_register_active_regions(0, 0, -1UL);
787 saved_max_pfn = e820_end_of_ram();
788 remove_all_active_ranges();
789#endif
790 max_pfn_mapped = 0;
791 e820.nr_map = 0;
792 userdef = 1;
793 return 0;
794 }
795
796 oldp = p;
797 mem_size = memparse(p, &p);
798 if (p == oldp)
799 return -EINVAL;
800
801 userdef = 1;
802 if (*p == '@') {
803 start_at = memparse(p+1, &p);
804 add_memory_region(start_at, mem_size, E820_RAM);
805 } else if (*p == '#') {
806 start_at = memparse(p+1, &p);
807 add_memory_region(start_at, mem_size, E820_ACPI);
808 } else if (*p == '$') {
809 start_at = memparse(p+1, &p);
810 add_memory_region(start_at, mem_size, E820_RESERVED);
811 } else {
812 end_user_pfn = (mem_size >> PAGE_SHIFT);
813 }
814 return *p == '\0' ? 0 : -EINVAL;
815}
816early_param("memmap", parse_memmap_opt);
817
818void __init finish_e820_parsing(void)
819{
820 if (userdef) {
821 char nr = e820.nr_map;
822
823 if (sanitize_e820_map(e820.map, &nr) < 0)
824 early_panic("Invalid user supplied memory map");
825 e820.nr_map = nr;
826
827 printk(KERN_INFO "user-defined physical RAM map:\n");
828 e820_print_map("user");
829 }
830}
831
832void __init update_memory_range(u64 start, u64 size, unsigned old_type,
833 unsigned new_type)
834{
835 int i;
836
837 BUG_ON(old_type == new_type);
838
839 for (i = 0; i < e820.nr_map; i++) {
840 struct e820entry *ei = &e820.map[i];
841 u64 final_start, final_end;
842 if (ei->type != old_type)
843 continue;
844 /* totally covered? */
845 if (ei->addr >= start && ei->size <= size) {
846 ei->type = new_type;
847 continue;
848 }
849 /* partially covered */
850 final_start = max(start, ei->addr);
851 final_end = min(start + size, ei->addr + ei->size);
852 if (final_start >= final_end)
853 continue;
854 add_memory_region(final_start, final_end - final_start,
855 new_type);
856 }
857}
858
859void __init update_e820(void)
860{
861 u8 nr_map;
862
863 nr_map = e820.nr_map;
864 if (sanitize_e820_map(e820.map, &nr_map))
865 return;
866 e820.nr_map = nr_map;
867 printk(KERN_INFO "modified physical RAM map:\n");
868 e820_print_map("modified");
869}
870
871unsigned long pci_mem_start = 0xaeedbabe;
872EXPORT_SYMBOL(pci_mem_start);
873
874/*
875 * Search for the biggest gap in the low 32 bits of the e820
876 * memory space. We pass this space to PCI to assign MMIO resources
877 * for hotplug or unconfigured devices in.
878 * Hopefully the BIOS let enough space left.
879 */
880__init void e820_setup_gap(void)
881{
882 unsigned long gapstart, gapsize, round;
883 unsigned long last;
884 int i;
885 int found = 0;
886
887 last = 0x100000000ull;
888 gapstart = 0x10000000;
889 gapsize = 0x400000;
890 i = e820.nr_map;
891 while (--i >= 0) {
892 unsigned long long start = e820.map[i].addr;
893 unsigned long long end = start + e820.map[i].size;
894
895 /*
896 * Since "last" is at most 4GB, we know we'll
897 * fit in 32 bits if this condition is true
898 */
899 if (last > end) {
900 unsigned long gap = last - end;
901
902 if (gap > gapsize) {
903 gapsize = gap;
904 gapstart = end;
905 found = 1;
906 }
907 }
908 if (start < last)
909 last = start;
910 }
911
912 if (!found) {
913 gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
914 printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
915 "address range\n"
916 KERN_ERR "PCI: Unassigned devices with 32bit resource "
917 "registers may break!\n");
918 }
919
920 /*
921 * See how much we want to round up: start off with
922 * rounding to the next 1MB area.
923 */
924 round = 0x100000;
925 while ((gapsize >> 4) > round)
926 round += round;
927 /* Fun with two's complement */
928 pci_mem_start = (gapstart + round) & -round;
929
930 printk(KERN_INFO
931 "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
932 pci_mem_start, gapstart, gapsize);
933}
934
935int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
936{
937 int i;
938
939 if (slot < 0 || slot >= e820.nr_map)
940 return -1;
941 for (i = slot; i < e820.nr_map; i++) {
942 if (e820.map[i].type != E820_RAM)
943 continue;
944 break;
945 }
946 if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
947 return -1;
948 *addr = e820.map[i].addr;
949 *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
950 max_pfn << PAGE_SHIFT) - *addr;
951 return i + 1;
952}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f51e1ea9e82..a4665f37cfc5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -50,7 +50,7 @@ static void __init fix_hypertransport_config(int num, int slot, int func)
50static void __init via_bugs(int num, int slot, int func) 50static void __init via_bugs(int num, int slot, int func)
51{ 51{
52#ifdef CONFIG_GART_IOMMU 52#ifdef CONFIG_GART_IOMMU
53 if ((end_pfn > MAX_DMA32_PFN || force_iommu) && 53 if ((max_pfn > MAX_DMA32_PFN || force_iommu) &&
54 !gart_iommu_aperture_allowed) { 54 !gart_iommu_aperture_allowed) {
55 printk(KERN_INFO 55 printk(KERN_INFO
56 "Looks like a VIA chipset. Disabling IOMMU." 56 "Looks like a VIA chipset. Disabling IOMMU."
@@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
98 98
99} 99}
100 100
101static void __init ati_bugs(int num, int slot, int func)
102{
103#ifdef CONFIG_X86_IO_APIC
104 if (timer_over_8254 == 1) {
105 timer_over_8254 = 0;
106 printk(KERN_INFO
107 "ATI board detected. Disabling timer routing over 8254.\n");
108 }
109#endif
110}
111
112#define QFLAG_APPLY_ONCE 0x1 101#define QFLAG_APPLY_ONCE 0x1
113#define QFLAG_APPLIED 0x2 102#define QFLAG_APPLIED 0x2
114#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) 103#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = {
126 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, 115 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
127 { PCI_VENDOR_ID_VIA, PCI_ANY_ID, 116 { PCI_VENDOR_ID_VIA, PCI_ANY_ID,
128 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, 117 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
129 { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
130 PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs },
131 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, 118 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
132 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, 119 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
133 {} 120 {}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 77d424cf68b3..06cc8d4254b1 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -64,6 +64,17 @@ static int __init setup_noefi(char *arg)
64} 64}
65early_param("noefi", setup_noefi); 65early_param("noefi", setup_noefi);
66 66
67int add_efi_memmap;
68EXPORT_SYMBOL(add_efi_memmap);
69
70static int __init setup_add_efi_memmap(char *arg)
71{
72 add_efi_memmap = 1;
73 return 0;
74}
75early_param("add_efi_memmap", setup_add_efi_memmap);
76
77
67static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) 78static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
68{ 79{
69 return efi_call_virt2(get_time, tm, tc); 80 return efi_call_virt2(get_time, tm, tc);
@@ -213,6 +224,50 @@ unsigned long efi_get_time(void)
213 eft.minute, eft.second); 224 eft.minute, eft.second);
214} 225}
215 226
227/*
228 * Tell the kernel about the EFI memory map. This might include
229 * more than the max 128 entries that can fit in the e820 legacy
230 * (zeropage) memory map.
231 */
232
233static void __init do_add_efi_memmap(void)
234{
235 void *p;
236
237 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
238 efi_memory_desc_t *md = p;
239 unsigned long long start = md->phys_addr;
240 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
241 int e820_type;
242
243 if (md->attribute & EFI_MEMORY_WB)
244 e820_type = E820_RAM;
245 else
246 e820_type = E820_RESERVED;
247 e820_add_region(start, size, e820_type);
248 }
249 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
250}
251
252void __init efi_reserve_early(void)
253{
254 unsigned long pmap;
255
256#ifdef CONFIG_X86_32
257 pmap = boot_params.efi_info.efi_memmap;
258#else
259 pmap = (boot_params.efi_info.efi_memmap |
260 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
261#endif
262 memmap.phys_map = (void *)pmap;
263 memmap.nr_map = boot_params.efi_info.efi_memmap_size /
264 boot_params.efi_info.efi_memdesc_size;
265 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
266 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
267 reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size,
268 "EFI memmap");
269}
270
216#if EFI_DEBUG 271#if EFI_DEBUG
217static void __init print_efi_memmap(void) 272static void __init print_efi_memmap(void)
218{ 273{
@@ -244,19 +299,11 @@ void __init efi_init(void)
244 299
245#ifdef CONFIG_X86_32 300#ifdef CONFIG_X86_32
246 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; 301 efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
247 memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
248#else 302#else
249 efi_phys.systab = (efi_system_table_t *) 303 efi_phys.systab = (efi_system_table_t *)
250 (boot_params.efi_info.efi_systab | 304 (boot_params.efi_info.efi_systab |
251 ((__u64)boot_params.efi_info.efi_systab_hi<<32)); 305 ((__u64)boot_params.efi_info.efi_systab_hi<<32));
252 memmap.phys_map = (void *)
253 (boot_params.efi_info.efi_memmap |
254 ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
255#endif 306#endif
256 memmap.nr_map = boot_params.efi_info.efi_memmap_size /
257 boot_params.efi_info.efi_memdesc_size;
258 memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
259 memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
260 307
261 efi.systab = early_ioremap((unsigned long)efi_phys.systab, 308 efi.systab = early_ioremap((unsigned long)efi_phys.systab,
262 sizeof(efi_system_table_t)); 309 sizeof(efi_system_table_t));
@@ -370,6 +417,8 @@ void __init efi_init(void)
370 if (memmap.desc_size != sizeof(efi_memory_desc_t)) 417 if (memmap.desc_size != sizeof(efi_memory_desc_t))
371 printk(KERN_WARNING "Kernel-defined memdesc" 418 printk(KERN_WARNING "Kernel-defined memdesc"
372 "doesn't match the one from EFI!\n"); 419 "doesn't match the one from EFI!\n");
420 if (add_efi_memmap)
421 do_add_efi_memmap();
373 422
374 /* Setup for EFI runtime service */ 423 /* Setup for EFI runtime service */
375 reboot_type = BOOT_EFI; 424 reboot_type = BOOT_EFI;
@@ -424,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
424 size = md->num_pages << EFI_PAGE_SHIFT; 473 size = md->num_pages << EFI_PAGE_SHIFT;
425 end = md->phys_addr + size; 474 end = md->phys_addr + size;
426 475
427 if (PFN_UP(end) <= max_pfn_mapped) 476 if (PFN_UP(end) <= max_low_pfn_mapped)
428 va = __va(md->phys_addr); 477 va = __va(md->phys_addr);
429 else 478 else
430 va = efi_ioremap(md->phys_addr, size); 479 va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index d0060fdcccac..652c5287215f 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void)
97 early_runtime_code_mapping_set_exec(0); 97 early_runtime_code_mapping_set_exec(0);
98} 98}
99 99
100void __init efi_reserve_bootmem(void) 100void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
101{
102 reserve_bootmem_generic((unsigned long)memmap.phys_map,
103 memmap.nr_map * memmap.desc_size);
104}
105
106void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
107{ 101{
108 static unsigned pages_mapped __initdata; 102 static unsigned pages_mapped __initdata;
109 unsigned i, pages; 103 unsigned i, pages;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c778e4fa55a2..cfe28a715434 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,14 +51,14 @@
51#include <asm/percpu.h> 51#include <asm/percpu.h>
52#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
53#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
54#include "irq_vectors.h" 54#include <asm/irq_vectors.h>
55 55
56/* 56/*
57 * We use macros for low-level operations which need to be overridden 57 * We use macros for low-level operations which need to be overridden
58 * for paravirtualization. The following will never clobber any registers: 58 * for paravirtualization. The following will never clobber any registers:
59 * INTERRUPT_RETURN (aka. "iret") 59 * INTERRUPT_RETURN (aka. "iret")
60 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 60 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
61 * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). 61 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
62 * 62 *
63 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 63 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
64 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 64 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -349,7 +349,7 @@ sysenter_past_esp:
349 xorl %ebp,%ebp 349 xorl %ebp,%ebp
350 TRACE_IRQS_ON 350 TRACE_IRQS_ON
3511: mov PT_FS(%esp), %fs 3511: mov PT_FS(%esp), %fs
352 ENABLE_INTERRUPTS_SYSCALL_RET 352 ENABLE_INTERRUPTS_SYSEXIT
353 CFI_ENDPROC 353 CFI_ENDPROC
354.pushsection .fixup,"ax" 354.pushsection .fixup,"ax"
3552: movl $0,PT_FS(%esp) 3552: movl $0,PT_FS(%esp)
@@ -874,10 +874,10 @@ ENTRY(native_iret)
874.previous 874.previous
875END(native_iret) 875END(native_iret)
876 876
877ENTRY(native_irq_enable_syscall_ret) 877ENTRY(native_irq_enable_sysexit)
878 sti 878 sti
879 sysexit 879 sysexit
880END(native_irq_enable_syscall_ret) 880END(native_irq_enable_sysexit)
881#endif 881#endif
882 882
883KPROBE_ENTRY(int3) 883KPROBE_ENTRY(int3)
@@ -1024,6 +1024,7 @@ ENTRY(xen_sysenter_target)
1024 RING0_INT_FRAME 1024 RING0_INT_FRAME
1025 addl $5*4, %esp /* remove xen-provided frame */ 1025 addl $5*4, %esp /* remove xen-provided frame */
1026 jmp sysenter_past_esp 1026 jmp sysenter_past_esp
1027 CFI_ENDPROC
1027 1028
1028ENTRY(xen_hypervisor_callback) 1029ENTRY(xen_hypervisor_callback)
1029 CFI_STARTPROC 1030 CFI_STARTPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..bb4e22f4892f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -59,8 +59,7 @@
59#endif 59#endif
60 60
61#ifdef CONFIG_PARAVIRT 61#ifdef CONFIG_PARAVIRT
62ENTRY(native_irq_enable_syscall_ret) 62ENTRY(native_usergs_sysret64)
63 movq %gs:pda_oldrsp,%rsp
64 swapgs 63 swapgs
65 sysretq 64 sysretq
66#endif /* CONFIG_PARAVIRT */ 65#endif /* CONFIG_PARAVIRT */
@@ -104,7 +103,7 @@ ENTRY(native_irq_enable_syscall_ret)
104 .macro FAKE_STACK_FRAME child_rip 103 .macro FAKE_STACK_FRAME child_rip
105 /* push in order ss, rsp, eflags, cs, rip */ 104 /* push in order ss, rsp, eflags, cs, rip */
106 xorl %eax, %eax 105 xorl %eax, %eax
107 pushq %rax /* ss */ 106 pushq $__KERNEL_DS /* ss */
108 CFI_ADJUST_CFA_OFFSET 8 107 CFI_ADJUST_CFA_OFFSET 8
109 /*CFI_REL_OFFSET ss,0*/ 108 /*CFI_REL_OFFSET ss,0*/
110 pushq %rax /* rsp */ 109 pushq %rax /* rsp */
@@ -169,13 +168,13 @@ ENTRY(ret_from_fork)
169 CFI_ADJUST_CFA_OFFSET -4 168 CFI_ADJUST_CFA_OFFSET -4
170 call schedule_tail 169 call schedule_tail
171 GET_THREAD_INFO(%rcx) 170 GET_THREAD_INFO(%rcx)
172 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) 171 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
173 jnz rff_trace 172 jnz rff_trace
174rff_action: 173rff_action:
175 RESTORE_REST 174 RESTORE_REST
176 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 175 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
177 je int_ret_from_sys_call 176 je int_ret_from_sys_call
178 testl $_TIF_IA32,threadinfo_flags(%rcx) 177 testl $_TIF_IA32,TI_flags(%rcx)
179 jnz int_ret_from_sys_call 178 jnz int_ret_from_sys_call
180 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 179 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
181 jmp ret_from_sys_call 180 jmp ret_from_sys_call
@@ -244,7 +243,8 @@ ENTRY(system_call_after_swapgs)
244 movq %rcx,RIP-ARGOFFSET(%rsp) 243 movq %rcx,RIP-ARGOFFSET(%rsp)
245 CFI_REL_OFFSET rip,RIP-ARGOFFSET 244 CFI_REL_OFFSET rip,RIP-ARGOFFSET
246 GET_THREAD_INFO(%rcx) 245 GET_THREAD_INFO(%rcx)
247 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 246 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
247 TI_flags(%rcx)
248 jnz tracesys 248 jnz tracesys
249 cmpq $__NR_syscall_max,%rax 249 cmpq $__NR_syscall_max,%rax
250 ja badsys 250 ja badsys
@@ -263,7 +263,7 @@ sysret_check:
263 GET_THREAD_INFO(%rcx) 263 GET_THREAD_INFO(%rcx)
264 DISABLE_INTERRUPTS(CLBR_NONE) 264 DISABLE_INTERRUPTS(CLBR_NONE)
265 TRACE_IRQS_OFF 265 TRACE_IRQS_OFF
266 movl threadinfo_flags(%rcx),%edx 266 movl TI_flags(%rcx),%edx
267 andl %edi,%edx 267 andl %edi,%edx
268 jnz sysret_careful 268 jnz sysret_careful
269 CFI_REMEMBER_STATE 269 CFI_REMEMBER_STATE
@@ -275,7 +275,8 @@ sysret_check:
275 CFI_REGISTER rip,rcx 275 CFI_REGISTER rip,rcx
276 RESTORE_ARGS 0,-ARG_SKIP,1 276 RESTORE_ARGS 0,-ARG_SKIP,1
277 /*CFI_REGISTER rflags,r11*/ 277 /*CFI_REGISTER rflags,r11*/
278 ENABLE_INTERRUPTS_SYSCALL_RET 278 movq %gs:pda_oldrsp, %rsp
279 USERGS_SYSRET64
279 280
280 CFI_RESTORE_STATE 281 CFI_RESTORE_STATE
281 /* Handle reschedules */ 282 /* Handle reschedules */
@@ -305,7 +306,7 @@ sysret_signal:
305 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 306 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
306 xorl %esi,%esi # oldset -> arg2 307 xorl %esi,%esi # oldset -> arg2
307 call ptregscall_common 308 call ptregscall_common
3081: movl $_TIF_NEED_RESCHED,%edi 3091: movl $_TIF_WORK_MASK,%edi
309 /* Use IRET because user could have changed frame. This 310 /* Use IRET because user could have changed frame. This
310 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 311 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
311 DISABLE_INTERRUPTS(CLBR_NONE) 312 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -347,10 +348,10 @@ int_ret_from_sys_call:
347int_with_check: 348int_with_check:
348 LOCKDEP_SYS_EXIT_IRQ 349 LOCKDEP_SYS_EXIT_IRQ
349 GET_THREAD_INFO(%rcx) 350 GET_THREAD_INFO(%rcx)
350 movl threadinfo_flags(%rcx),%edx 351 movl TI_flags(%rcx),%edx
351 andl %edi,%edx 352 andl %edi,%edx
352 jnz int_careful 353 jnz int_careful
353 andl $~TS_COMPAT,threadinfo_status(%rcx) 354 andl $~TS_COMPAT,TI_status(%rcx)
354 jmp retint_swapgs 355 jmp retint_swapgs
355 356
356 /* Either reschedule or signal or syscall exit tracking needed. */ 357 /* Either reschedule or signal or syscall exit tracking needed. */
@@ -393,7 +394,7 @@ int_signal:
393 movq %rsp,%rdi # &ptregs -> arg1 394 movq %rsp,%rdi # &ptregs -> arg1
394 xorl %esi,%esi # oldset -> arg2 395 xorl %esi,%esi # oldset -> arg2
395 call do_notify_resume 396 call do_notify_resume
3961: movl $_TIF_NEED_RESCHED,%edi 3971: movl $_TIF_WORK_MASK,%edi
397int_restore_rest: 398int_restore_rest:
398 RESTORE_REST 399 RESTORE_REST
399 DISABLE_INTERRUPTS(CLBR_NONE) 400 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -420,7 +421,6 @@ END(\label)
420 PTREGSCALL stub_clone, sys_clone, %r8 421 PTREGSCALL stub_clone, sys_clone, %r8
421 PTREGSCALL stub_fork, sys_fork, %rdi 422 PTREGSCALL stub_fork, sys_fork, %rdi
422 PTREGSCALL stub_vfork, sys_vfork, %rdi 423 PTREGSCALL stub_vfork, sys_vfork, %rdi
423 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
424 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx 424 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
425 PTREGSCALL stub_iopl, sys_iopl, %rsi 425 PTREGSCALL stub_iopl, sys_iopl, %rsi
426 426
@@ -559,7 +559,7 @@ retint_with_reschedule:
559 movl $_TIF_WORK_MASK,%edi 559 movl $_TIF_WORK_MASK,%edi
560retint_check: 560retint_check:
561 LOCKDEP_SYS_EXIT_IRQ 561 LOCKDEP_SYS_EXIT_IRQ
562 movl threadinfo_flags(%rcx),%edx 562 movl TI_flags(%rcx),%edx
563 andl %edi,%edx 563 andl %edi,%edx
564 CFI_REMEMBER_STATE 564 CFI_REMEMBER_STATE
565 jnz retint_careful 565 jnz retint_careful
@@ -647,17 +647,16 @@ retint_signal:
647 RESTORE_REST 647 RESTORE_REST
648 DISABLE_INTERRUPTS(CLBR_NONE) 648 DISABLE_INTERRUPTS(CLBR_NONE)
649 TRACE_IRQS_OFF 649 TRACE_IRQS_OFF
650 movl $_TIF_NEED_RESCHED,%edi
651 GET_THREAD_INFO(%rcx) 650 GET_THREAD_INFO(%rcx)
652 jmp retint_check 651 jmp retint_with_reschedule
653 652
654#ifdef CONFIG_PREEMPT 653#ifdef CONFIG_PREEMPT
655 /* Returning to kernel space. Check if we need preemption */ 654 /* Returning to kernel space. Check if we need preemption */
656 /* rcx: threadinfo. interrupts off. */ 655 /* rcx: threadinfo. interrupts off. */
657ENTRY(retint_kernel) 656ENTRY(retint_kernel)
658 cmpl $0,threadinfo_preempt_count(%rcx) 657 cmpl $0,TI_preempt_count(%rcx)
659 jnz retint_restore_args 658 jnz retint_restore_args
660 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) 659 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
661 jnc retint_restore_args 660 jnc retint_restore_args
662 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 661 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
663 jnc retint_restore_args 662 jnc retint_restore_args
@@ -720,6 +719,10 @@ ENTRY(apic_timer_interrupt)
720 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 719 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
721END(apic_timer_interrupt) 720END(apic_timer_interrupt)
722 721
722ENTRY(uv_bau_message_intr1)
723 apicinterrupt 220,uv_bau_message_interrupt
724END(uv_bau_message_intr1)
725
723ENTRY(error_interrupt) 726ENTRY(error_interrupt)
724 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 727 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
725END(error_interrupt) 728END(error_interrupt)
@@ -733,6 +736,7 @@ END(spurious_interrupt)
733 */ 736 */
734 .macro zeroentry sym 737 .macro zeroentry sym
735 INTR_FRAME 738 INTR_FRAME
739 PARAVIRT_ADJUST_EXCEPTION_FRAME
736 pushq $0 /* push error code/oldrax */ 740 pushq $0 /* push error code/oldrax */
737 CFI_ADJUST_CFA_OFFSET 8 741 CFI_ADJUST_CFA_OFFSET 8
738 pushq %rax /* push real oldrax to the rdi slot */ 742 pushq %rax /* push real oldrax to the rdi slot */
@@ -745,6 +749,7 @@ END(spurious_interrupt)
745 749
746 .macro errorentry sym 750 .macro errorentry sym
747 XCPT_FRAME 751 XCPT_FRAME
752 PARAVIRT_ADJUST_EXCEPTION_FRAME
748 pushq %rax 753 pushq %rax
749 CFI_ADJUST_CFA_OFFSET 8 754 CFI_ADJUST_CFA_OFFSET 8
750 CFI_REL_OFFSET rax,0 755 CFI_REL_OFFSET rax,0
@@ -814,7 +819,7 @@ paranoid_restore\trace:
814 jmp irq_return 819 jmp irq_return
815paranoid_userspace\trace: 820paranoid_userspace\trace:
816 GET_THREAD_INFO(%rcx) 821 GET_THREAD_INFO(%rcx)
817 movl threadinfo_flags(%rcx),%ebx 822 movl TI_flags(%rcx),%ebx
818 andl $_TIF_WORK_MASK,%ebx 823 andl $_TIF_WORK_MASK,%ebx
819 jz paranoid_swapgs\trace 824 jz paranoid_swapgs\trace
820 movq %rsp,%rdi /* &pt_regs */ 825 movq %rsp,%rdi /* &pt_regs */
@@ -912,7 +917,7 @@ error_exit:
912 testl %eax,%eax 917 testl %eax,%eax
913 jne retint_kernel 918 jne retint_kernel
914 LOCKDEP_SYS_EXIT_IRQ 919 LOCKDEP_SYS_EXIT_IRQ
915 movl threadinfo_flags(%rcx),%edx 920 movl TI_flags(%rcx),%edx
916 movl $_TIF_WORK_MASK,%edi 921 movl $_TIF_WORK_MASK,%edi
917 andl %edi,%edx 922 andl %edi,%edx
918 jnz retint_careful 923 jnz retint_careful
@@ -926,11 +931,11 @@ error_kernelspace:
926 iret run with kernel gs again, so don't set the user space flag. 931 iret run with kernel gs again, so don't set the user space flag.
927 B stepping K8s sometimes report an truncated RIP for IRET 932 B stepping K8s sometimes report an truncated RIP for IRET
928 exceptions returning to compat mode. Check for these here too. */ 933 exceptions returning to compat mode. Check for these here too. */
929 leaq irq_return(%rip),%rbp 934 leaq irq_return(%rip),%rcx
930 cmpq %rbp,RIP(%rsp) 935 cmpq %rcx,RIP(%rsp)
931 je error_swapgs 936 je error_swapgs
932 movl %ebp,%ebp /* zero extend */ 937 movl %ecx,%ecx /* zero extend */
933 cmpq %rbp,RIP(%rsp) 938 cmpq %rcx,RIP(%rsp)
934 je error_swapgs 939 je error_swapgs
935 cmpq $gs_change,RIP(%rsp) 940 cmpq $gs_change,RIP(%rsp)
936 je error_swapgs 941 je error_swapgs
@@ -939,7 +944,7 @@ KPROBE_END(error_entry)
939 944
940 /* Reload gs selector with exception handling */ 945 /* Reload gs selector with exception handling */
941 /* edi: new selector */ 946 /* edi: new selector */
942ENTRY(load_gs_index) 947ENTRY(native_load_gs_index)
943 CFI_STARTPROC 948 CFI_STARTPROC
944 pushf 949 pushf
945 CFI_ADJUST_CFA_OFFSET 8 950 CFI_ADJUST_CFA_OFFSET 8
@@ -953,7 +958,7 @@ gs_change:
953 CFI_ADJUST_CFA_OFFSET -8 958 CFI_ADJUST_CFA_OFFSET -8
954 ret 959 ret
955 CFI_ENDPROC 960 CFI_ENDPROC
956ENDPROC(load_gs_index) 961ENDPROC(native_load_gs_index)
957 962
958 .section __ex_table,"a" 963 .section __ex_table,"a"
959 .align 8 964 .align 8
@@ -1120,10 +1125,6 @@ ENTRY(coprocessor_segment_overrun)
1120 zeroentry do_coprocessor_segment_overrun 1125 zeroentry do_coprocessor_segment_overrun
1121END(coprocessor_segment_overrun) 1126END(coprocessor_segment_overrun)
1122 1127
1123ENTRY(reserved)
1124 zeroentry do_reserved
1125END(reserved)
1126
1127 /* runs on exception stack */ 1128 /* runs on exception stack */
1128ENTRY(double_fault) 1129ENTRY(double_fault)
1129 XCPT_FRAME 1130 XCPT_FRAME
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index cbaaf69bedb2..1fa8be5bd217 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
51 else 51 else
52#endif 52#endif
53 53
54 if (num_possible_cpus() <= 8) 54 if (max_physical_apicid < 8)
55 genapic = &apic_flat; 55 genapic = &apic_flat;
56 else 56 else
57 genapic = &apic_physflat; 57 genapic = &apic_physflat;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index ebf13908a743..711f11c30b06 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -5,9 +5,10 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#include <linux/kernel.h>
11#include <linux/threads.h> 12#include <linux/threads.h>
12#include <linux/cpumask.h> 13#include <linux/cpumask.h>
13#include <linux/string.h> 14#include <linux/string.h>
@@ -20,6 +21,7 @@
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/pgtable.h>
23#include <asm/uv/uv_mmrs.h> 25#include <asm/uv/uv_mmrs.h>
24#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
25 27
@@ -55,37 +57,37 @@ static cpumask_t uv_vector_allocation_domain(int cpu)
55int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) 57int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
56{ 58{
57 unsigned long val; 59 unsigned long val;
58 int nasid; 60 int pnode;
59 61
60 nasid = uv_apicid_to_nasid(phys_apicid); 62 pnode = uv_apicid_to_pnode(phys_apicid);
61 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 63 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
62 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 64 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
63 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 65 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
64 APIC_DM_INIT; 66 APIC_DM_INIT;
65 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 67 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
66 mdelay(10); 68 mdelay(10);
67 69
68 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 70 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
69 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 71 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
70 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 72 (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
71 APIC_DM_STARTUP; 73 APIC_DM_STARTUP;
72 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 74 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
73 return 0; 75 return 0;
74} 76}
75 77
76static void uv_send_IPI_one(int cpu, int vector) 78static void uv_send_IPI_one(int cpu, int vector)
77{ 79{
78 unsigned long val, apicid, lapicid; 80 unsigned long val, apicid, lapicid;
79 int nasid; 81 int pnode;
80 82
81 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ 83 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
82 lapicid = apicid & 0x3f; /* ZZZ macro needed */ 84 lapicid = apicid & 0x3f; /* ZZZ macro needed */
83 nasid = uv_apicid_to_nasid(apicid); 85 pnode = uv_apicid_to_pnode(apicid);
84 val = 86 val =
85 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << 87 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
86 UVH_IPI_INT_APIC_ID_SHFT) | 88 UVH_IPI_INT_APIC_ID_SHFT) |
87 (vector << UVH_IPI_INT_VECTOR_SHFT); 89 (vector << UVH_IPI_INT_VECTOR_SHFT);
88 uv_write_global_mmr64(nasid, UVH_IPI_INT, val); 90 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
89} 91}
90 92
91static void uv_send_IPI_mask(cpumask_t mask, int vector) 93static void uv_send_IPI_mask(cpumask_t mask, int vector)
@@ -159,39 +161,146 @@ struct genapic apic_x2apic_uv_x = {
159 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 161 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
160}; 162};
161 163
162static __cpuinit void set_x2apic_extra_bits(int nasid) 164static __cpuinit void set_x2apic_extra_bits(int pnode)
163{ 165{
164 __get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6); 166 __get_cpu_var(x2apic_extra_bits) = (pnode << 6);
165} 167}
166 168
167/* 169/*
168 * Called on boot cpu. 170 * Called on boot cpu.
169 */ 171 */
172static __init int boot_pnode_to_blade(int pnode)
173{
174 int blade;
175
176 for (blade = 0; blade < uv_num_possible_blades(); blade++)
177 if (pnode == uv_blade_info[blade].pnode)
178 return blade;
179 BUG();
180}
181
182struct redir_addr {
183 unsigned long redirect;
184 unsigned long alias;
185};
186
187#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
188
189static __initdata struct redir_addr redir_addrs[] = {
190 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
191 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
192 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
193};
194
195static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
196{
197 union uvh_si_alias0_overlay_config_u alias;
198 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
199 int i;
200
201 for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
202 alias.v = uv_read_local_mmr(redir_addrs[i].alias);
203 if (alias.s.base == 0) {
204 *size = (1UL << alias.s.m_alias);
205 redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
206 *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
207 return;
208 }
209 }
210 BUG();
211}
212
213static __init void map_low_mmrs(void)
214{
215 init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
216 init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
217}
218
219enum map_type {map_wb, map_uc};
220
221static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
222{
223 unsigned long bytes, paddr;
224
225 paddr = base << shift;
226 bytes = (1UL << shift);
227 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
228 paddr + bytes);
229 if (map_type == map_uc)
230 init_extra_mapping_uc(paddr, bytes);
231 else
232 init_extra_mapping_wb(paddr, bytes);
233
234}
235static __init void map_gru_high(int max_pnode)
236{
237 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
238 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
239
240 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
241 if (gru.s.enable)
242 map_high("GRU", gru.s.base, shift, map_wb);
243}
244
245static __init void map_config_high(int max_pnode)
246{
247 union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
248 int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
249
250 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
251 if (cfg.s.enable)
252 map_high("CONFIG", cfg.s.base, shift, map_uc);
253}
254
255static __init void map_mmr_high(int max_pnode)
256{
257 union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
258 int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
259
260 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
261 if (mmr.s.enable)
262 map_high("MMR", mmr.s.base, shift, map_uc);
263}
264
265static __init void map_mmioh_high(int max_pnode)
266{
267 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
268 int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
269
270 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
271 if (mmioh.s.enable)
272 map_high("MMIOH", mmioh.s.base, shift, map_uc);
273}
274
170static __init void uv_system_init(void) 275static __init void uv_system_init(void)
171{ 276{
172 union uvh_si_addr_map_config_u m_n_config; 277 union uvh_si_addr_map_config_u m_n_config;
173 int bytes, nid, cpu, lcpu, nasid, last_nasid, blade; 278 union uvh_node_id_u node_id;
174 unsigned long mmr_base; 279 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
280 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
281 int max_pnode = 0;
282 unsigned long mmr_base, present;
283
284 map_low_mmrs();
175 285
176 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 286 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
287 m_val = m_n_config.s.m_skt;
288 n_val = m_n_config.s.n_skt;
177 mmr_base = 289 mmr_base =
178 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 290 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
179 ~UV_MMR_ENABLE; 291 ~UV_MMR_ENABLE;
180 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 292 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
181 293
182 last_nasid = -1; 294 for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
183 for_each_possible_cpu(cpu) { 295 uv_possible_blades +=
184 nid = cpu_to_node(cpu); 296 hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
185 nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
186 if (nasid != last_nasid)
187 uv_possible_blades++;
188 last_nasid = nasid;
189 }
190 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 297 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
191 298
192 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 299 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
193 uv_blade_info = alloc_bootmem_pages(bytes); 300 uv_blade_info = alloc_bootmem_pages(bytes);
194 301
302 get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
303
195 bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); 304 bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
196 uv_node_to_blade = alloc_bootmem_pages(bytes); 305 uv_node_to_blade = alloc_bootmem_pages(bytes);
197 memset(uv_node_to_blade, 255, bytes); 306 memset(uv_node_to_blade, 255, bytes);
@@ -200,43 +309,62 @@ static __init void uv_system_init(void)
200 uv_cpu_to_blade = alloc_bootmem_pages(bytes); 309 uv_cpu_to_blade = alloc_bootmem_pages(bytes);
201 memset(uv_cpu_to_blade, 255, bytes); 310 memset(uv_cpu_to_blade, 255, bytes);
202 311
203 last_nasid = -1; 312 blade = 0;
204 blade = -1; 313 for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
205 lcpu = -1; 314 present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
206 for_each_possible_cpu(cpu) { 315 for (j = 0; j < 64; j++) {
207 nid = cpu_to_node(cpu); 316 if (!test_bit(j, &present))
208 nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu)); 317 continue;
209 if (nasid != last_nasid) { 318 uv_blade_info[blade].pnode = (i * 64 + j);
210 blade++; 319 uv_blade_info[blade].nr_possible_cpus = 0;
211 lcpu = -1;
212 uv_blade_info[blade].nr_posible_cpus = 0;
213 uv_blade_info[blade].nr_online_cpus = 0; 320 uv_blade_info[blade].nr_online_cpus = 0;
321 blade++;
214 } 322 }
215 last_nasid = nasid; 323 }
216 lcpu++;
217 324
218 uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt; 325 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
219 uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt; 326 gnode_upper = (((unsigned long)node_id.s.node_id) &
327 ~((1 << n_val) - 1)) << m_val;
328
329 for_each_present_cpu(cpu) {
330 nid = cpu_to_node(cpu);
331 pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
332 blade = boot_pnode_to_blade(pnode);
333 lcpu = uv_blade_info[blade].nr_possible_cpus;
334 uv_blade_info[blade].nr_possible_cpus++;
335
336 uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
337 uv_cpu_hub_info(cpu)->lowmem_remap_top =
338 lowmem_redir_base + lowmem_redir_size;
339 uv_cpu_hub_info(cpu)->m_val = m_val;
340 uv_cpu_hub_info(cpu)->n_val = m_val;
220 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 341 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
221 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 342 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
222 uv_cpu_hub_info(cpu)->local_nasid = nasid; 343 uv_cpu_hub_info(cpu)->pnode = pnode;
223 uv_cpu_hub_info(cpu)->gnode_upper = 344 uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
224 nasid & ~((1 << uv_hub_info->n_val) - 1); 345 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
346 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
225 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 347 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
226 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ 348 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
227 uv_blade_info[blade].nasid = nasid;
228 uv_blade_info[blade].nr_posible_cpus++;
229 uv_node_to_blade[nid] = blade; 349 uv_node_to_blade[nid] = blade;
230 uv_cpu_to_blade[cpu] = blade; 350 uv_cpu_to_blade[cpu] = blade;
351 max_pnode = max(pnode, max_pnode);
231 352
232 printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n", 353 printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
233 cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid); 354 "lcpu %d, blade %d\n",
234 printk(KERN_DEBUG "UV lcpu %d, blade %d\n", lcpu, blade); 355 cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
356 lcpu, blade);
235 } 357 }
358
359 map_gru_high(max_pnode);
360 map_mmr_high(max_pnode);
361 map_config_high(max_pnode);
362 map_mmioh_high(max_pnode);
236} 363}
237 364
238/* 365/*
239 * Called on each cpu to initialize the per_cpu UV data area. 366 * Called on each cpu to initialize the per_cpu UV data area.
367 * ZZZ hotplug not supported yet
240 */ 368 */
241void __cpuinit uv_cpu_init(void) 369void __cpuinit uv_cpu_init(void)
242{ 370{
@@ -246,5 +374,5 @@ void __cpuinit uv_cpu_init(void)
246 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; 374 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
247 375
248 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 376 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
249 set_x2apic_extra_bits(uv_hub_info->local_nasid); 377 set_x2apic_extra_bits(uv_hub_info->pnode);
250} 378}
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
new file mode 100644
index 000000000000..3e66bd364a9d
--- /dev/null
+++ b/arch/x86/kernel/head.c
@@ -0,0 +1,55 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3
4#include <asm/setup.h>
5#include <asm/bios_ebda.h>
6
7#define BIOS_LOWMEM_KILOBYTES 0x413
8
9/*
10 * The BIOS places the EBDA/XBDA at the top of conventional
11 * memory, and usually decreases the reported amount of
12 * conventional memory (int 0x12) too. This also contains a
13 * workaround for Dell systems that neglect to reserve EBDA.
14 * The same workaround also avoids a problem with the AMD768MPX
15 * chipset: reserve a page before VGA to prevent PCI prefetch
16 * into it (errata #56). Usually the page is reserved anyways,
17 * unless you have no PS/2 mouse plugged in.
18 */
19void __init reserve_ebda_region(void)
20{
21 unsigned int lowmem, ebda_addr;
22
23 /* To determine the position of the EBDA and the */
24 /* end of conventional memory, we need to look at */
25 /* the BIOS data area. In a paravirtual environment */
26 /* that area is absent. We'll just have to assume */
27 /* that the paravirt case can handle memory setup */
28 /* correctly, without our help. */
29 if (paravirt_enabled())
30 return;
31
32 /* end of low (conventional) memory */
33 lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
34 lowmem <<= 10;
35
36 /* start of EBDA area */
37 ebda_addr = get_bios_ebda();
38
39 /* Fixup: bios puts an EBDA in the top 64K segment */
40 /* of conventional memory, but does not adjust lowmem. */
41 if ((lowmem - ebda_addr) <= 0x10000)
42 lowmem = ebda_addr;
43
44 /* Fixup: bios does not report an EBDA at all. */
45 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
46 if ((ebda_addr == 0) && (lowmem >= 0x9f000))
47 lowmem = 0x9f000;
48
49 /* Paranoia: should never happen, but... */
50 if ((lowmem == 0) || (lowmem >= 0x100000))
51 lowmem = 0x9f000;
52
53 /* reserve all memory between lowmem and the 1MB mark */
54 reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
55}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3db059058927..fa1d25dd83e3 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -8,7 +8,34 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/start_kernel.h> 9#include <linux/start_kernel.h>
10 10
11#include <asm/setup.h>
12#include <asm/sections.h>
13#include <asm/e820.h>
14#include <asm/bios_ebda.h>
15
11void __init i386_start_kernel(void) 16void __init i386_start_kernel(void)
12{ 17{
18 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
19
20#ifdef CONFIG_BLK_DEV_INITRD
21 /* Reserve INITRD */
22 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
23 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
24 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
25 u64 ramdisk_end = ramdisk_image + ramdisk_size;
26 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
27 }
28#endif
29 reserve_early(init_pg_tables_start, init_pg_tables_end,
30 "INIT_PG_TABLE");
31
32 reserve_ebda_region();
33
34 /*
35 * At this point everything still needed from the boot loader
36 * or BIOS or kernel text should be early reserved or marked not
37 * RAM in e820. All other memory is free game.
38 */
39
13 start_kernel(); 40 start_kernel();
14} 41}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index e25c57b8aa84..c97819829146 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,6 +25,20 @@
25#include <asm/e820.h> 25#include <asm/e820.h>
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27 27
28/* boot cpu pda */
29static struct x8664_pda _boot_cpu_pda __read_mostly;
30
31#ifdef CONFIG_SMP
32/*
33 * We install an empty cpu_pda pointer table to indicate to early users
34 * (numa_set_node) that the cpu_pda pointer table for cpus other than
35 * the boot cpu is not yet setup.
36 */
37static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
38#else
39static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
40#endif
41
28static void __init zap_identity_mappings(void) 42static void __init zap_identity_mappings(void)
29{ 43{
30 pgd_t *pgd = pgd_offset_k(0UL); 44 pgd_t *pgd = pgd_offset_k(0UL);
@@ -51,74 +65,6 @@ static void __init copy_bootdata(char *real_mode_data)
51 } 65 }
52} 66}
53 67
54#define BIOS_LOWMEM_KILOBYTES 0x413
55
56/*
57 * The BIOS places the EBDA/XBDA at the top of conventional
58 * memory, and usually decreases the reported amount of
59 * conventional memory (int 0x12) too. This also contains a
60 * workaround for Dell systems that neglect to reserve EBDA.
61 * The same workaround also avoids a problem with the AMD768MPX
62 * chipset: reserve a page before VGA to prevent PCI prefetch
63 * into it (errata #56). Usually the page is reserved anyways,
64 * unless you have no PS/2 mouse plugged in.
65 */
66static void __init reserve_ebda_region(void)
67{
68 unsigned int lowmem, ebda_addr;
69
70 /* To determine the position of the EBDA and the */
71 /* end of conventional memory, we need to look at */
72 /* the BIOS data area. In a paravirtual environment */
73 /* that area is absent. We'll just have to assume */
74 /* that the paravirt case can handle memory setup */
75 /* correctly, without our help. */
76 if (paravirt_enabled())
77 return;
78
79 /* end of low (conventional) memory */
80 lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
81 lowmem <<= 10;
82
83 /* start of EBDA area */
84 ebda_addr = get_bios_ebda();
85
86 /* Fixup: bios puts an EBDA in the top 64K segment */
87 /* of conventional memory, but does not adjust lowmem. */
88 if ((lowmem - ebda_addr) <= 0x10000)
89 lowmem = ebda_addr;
90
91 /* Fixup: bios does not report an EBDA at all. */
92 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
93 if ((ebda_addr == 0) && (lowmem >= 0x9f000))
94 lowmem = 0x9f000;
95
96 /* Paranoia: should never happen, but... */
97 if ((lowmem == 0) || (lowmem >= 0x100000))
98 lowmem = 0x9f000;
99
100 /* reserve all memory between lowmem and the 1MB mark */
101 reserve_early(lowmem, 0x100000, "BIOS reserved");
102}
103
104static void __init reserve_setup_data(void)
105{
106 struct setup_data *data;
107 unsigned long pa_data;
108 char buf[32];
109
110 if (boot_params.hdr.version < 0x0209)
111 return;
112 pa_data = boot_params.hdr.setup_data;
113 while (pa_data) {
114 data = early_ioremap(pa_data, sizeof(*data));
115 sprintf(buf, "setup data %x", data->type);
116 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
117 pa_data = data->next;
118 early_iounmap(data, sizeof(*data));
119 }
120}
121
122void __init x86_64_start_kernel(char * real_mode_data) 68void __init x86_64_start_kernel(char * real_mode_data)
123{ 69{
124 int i; 70 int i;
@@ -156,10 +102,17 @@ void __init x86_64_start_kernel(char * real_mode_data)
156 102
157 early_printk("Kernel alive\n"); 103 early_printk("Kernel alive\n");
158 104
159 for (i = 0; i < NR_CPUS; i++) 105 _cpu_pda = __cpu_pda;
160 cpu_pda(i) = &boot_cpu_pda[i]; 106 cpu_pda(0) = &_boot_cpu_pda;
161
162 pda_init(0); 107 pda_init(0);
108
109 early_printk("Kernel really alive\n");
110
111 x86_64_start_reservations(real_mode_data);
112}
113
114void __init x86_64_start_reservations(char *real_mode_data)
115{
163 copy_bootdata(__va(real_mode_data)); 116 copy_bootdata(__va(real_mode_data));
164 117
165 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 118 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
@@ -175,7 +128,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
175#endif 128#endif
176 129
177 reserve_ebda_region(); 130 reserve_ebda_region();
178 reserve_setup_data();
179 131
180 /* 132 /*
181 * At this point everything still needed from the boot loader 133 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f7357cc0162c..f67e93441caf 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -194,6 +194,7 @@ default_entry:
194 xorl %ebx,%ebx /* %ebx is kept at zero */ 194 xorl %ebx,%ebx /* %ebx is kept at zero */
195 195
196 movl $pa(pg0), %edi 196 movl $pa(pg0), %edi
197 movl %edi, pa(init_pg_tables_start)
197 movl $pa(swapper_pg_pmd), %edx 198 movl $pa(swapper_pg_pmd), %edx
198 movl $PTE_ATTR, %eax 199 movl $PTE_ATTR, %eax
19910: 20010:
@@ -219,6 +220,8 @@ default_entry:
219 jb 10b 220 jb 10b
2201: 2211:
221 movl %edi,pa(init_pg_tables_end) 222 movl %edi,pa(init_pg_tables_end)
223 shrl $12, %eax
224 movl %eax, pa(max_pfn_mapped)
222 225
223 /* Do early initialization of the fixmap area */ 226 /* Do early initialization of the fixmap area */
224 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 227 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
@@ -228,6 +231,7 @@ default_entry:
228page_pde_offset = (__PAGE_OFFSET >> 20); 231page_pde_offset = (__PAGE_OFFSET >> 20);
229 232
230 movl $pa(pg0), %edi 233 movl $pa(pg0), %edi
234 movl %edi, pa(init_pg_tables_start)
231 movl $pa(swapper_pg_dir), %edx 235 movl $pa(swapper_pg_dir), %edx
232 movl $PTE_ATTR, %eax 236 movl $PTE_ATTR, %eax
23310: 23710:
@@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
249 cmpl %ebp,%eax 253 cmpl %ebp,%eax
250 jb 10b 254 jb 10b
251 movl %edi,pa(init_pg_tables_end) 255 movl %edi,pa(init_pg_tables_end)
256 shrl $12, %eax
257 movl %eax, pa(max_pfn_mapped)
252 258
253 /* Do early initialization of the fixmap area */ 259 /* Do early initialization of the fixmap area */
254 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax 260 movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
@@ -446,10 +452,13 @@ is386: movl $2,%ecx # set MP
446 je 1f 452 je 1f
447 movl $(__KERNEL_PERCPU), %eax 453 movl $(__KERNEL_PERCPU), %eax
448 movl %eax,%fs # set this cpu's percpu 454 movl %eax,%fs # set this cpu's percpu
449 jmp initialize_secondary # all other CPUs call initialize_secondary 455 movl (stack_start), %esp
4501: 4561:
451#endif /* CONFIG_SMP */ 457#endif /* CONFIG_SMP */
452 jmp i386_start_kernel 458 jmp *(initial_code)
459.align 4
460ENTRY(initial_code)
461 .long i386_start_kernel
453 462
454/* 463/*
455 * We depend on ET to be correct. This checks for 287/387. 464 * We depend on ET to be correct. This checks for 287/387.
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b817974ef942..b07ac7b217cb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -18,6 +18,7 @@
18#include <asm/page.h> 18#include <asm/page.h>
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h>
21 22
22#ifdef CONFIG_PARAVIRT 23#ifdef CONFIG_PARAVIRT
23#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
@@ -31,6 +32,13 @@
31 * 32 *
32 */ 33 */
33 34
35#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
36
37L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
38L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
39L4_START_KERNEL = pgd_index(__START_KERNEL_map)
40L3_START_KERNEL = pud_index(__START_KERNEL_map)
41
34 .text 42 .text
35 .section .text.head 43 .section .text.head
36 .code64 44 .code64
@@ -76,8 +84,8 @@ startup_64:
76 /* Fixup the physical addresses in the page table 84 /* Fixup the physical addresses in the page table
77 */ 85 */
78 addq %rbp, init_level4_pgt + 0(%rip) 86 addq %rbp, init_level4_pgt + 0(%rip)
79 addq %rbp, init_level4_pgt + (258*8)(%rip) 87 addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
80 addq %rbp, init_level4_pgt + (511*8)(%rip) 88 addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
81 89
82 addq %rbp, level3_ident_pgt + 0(%rip) 90 addq %rbp, level3_ident_pgt + 0(%rip)
83 91
@@ -154,9 +162,7 @@ ENTRY(secondary_startup_64)
154 */ 162 */
155 163
156 /* Enable PAE mode and PGE */ 164 /* Enable PAE mode and PGE */
157 xorq %rax, %rax 165 movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
158 btsq $5, %rax
159 btsq $7, %rax
160 movq %rax, %cr4 166 movq %rax, %cr4
161 167
162 /* Setup early boot stage 4 level pagetables. */ 168 /* Setup early boot stage 4 level pagetables. */
@@ -184,19 +190,15 @@ ENTRY(secondary_startup_64)
1841: wrmsr /* Make changes effective */ 1901: wrmsr /* Make changes effective */
185 191
186 /* Setup cr0 */ 192 /* Setup cr0 */
187#define CR0_PM 1 /* protected mode */ 193#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
188#define CR0_MP (1<<1) 194 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
189#define CR0_ET (1<<4) 195 X86_CR0_PG)
190#define CR0_NE (1<<5) 196 movl $CR0_STATE, %eax
191#define CR0_WP (1<<16)
192#define CR0_AM (1<<18)
193#define CR0_PAGING (1<<31)
194 movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
195 /* Make changes effective */ 197 /* Make changes effective */
196 movq %rax, %cr0 198 movq %rax, %cr0
197 199
198 /* Setup a boot time stack */ 200 /* Setup a boot time stack */
199 movq init_rsp(%rip),%rsp 201 movq stack_start(%rip),%rsp
200 202
201 /* zero EFLAGS after setting rsp */ 203 /* zero EFLAGS after setting rsp */
202 pushq $0 204 pushq $0
@@ -208,7 +210,7 @@ ENTRY(secondary_startup_64)
208 * addresses where we're currently running on. We have to do that here 210 * addresses where we're currently running on. We have to do that here
209 * because in 32bit we couldn't load a 64bit linear address. 211 * because in 32bit we couldn't load a 64bit linear address.
210 */ 212 */
211 lgdt cpu_gdt_descr(%rip) 213 lgdt early_gdt_descr(%rip)
212 214
213 /* set up data segments. actually 0 would do too */ 215 /* set up data segments. actually 0 would do too */
214 movl $__KERNEL_DS,%eax 216 movl $__KERNEL_DS,%eax
@@ -257,8 +259,9 @@ ENTRY(secondary_startup_64)
257 .quad x86_64_start_kernel 259 .quad x86_64_start_kernel
258 __FINITDATA 260 __FINITDATA
259 261
260 ENTRY(init_rsp) 262 ENTRY(stack_start)
261 .quad init_thread_union+THREAD_SIZE-8 263 .quad init_thread_union+THREAD_SIZE-8
264 .word 0
262 265
263bad_address: 266bad_address:
264 jmp bad_address 267 jmp bad_address
@@ -327,11 +330,11 @@ early_idt_ripmsg:
327ENTRY(name) 330ENTRY(name)
328 331
329/* Automate the creation of 1 to 1 mapping pmd entries */ 332/* Automate the creation of 1 to 1 mapping pmd entries */
330#define PMDS(START, PERM, COUNT) \ 333#define PMDS(START, PERM, COUNT) \
331 i = 0 ; \ 334 i = 0 ; \
332 .rept (COUNT) ; \ 335 .rept (COUNT) ; \
333 .quad (START) + (i << 21) + (PERM) ; \ 336 .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
334 i = i + 1 ; \ 337 i = i + 1 ; \
335 .endr 338 .endr
336 339
337 /* 340 /*
@@ -342,9 +345,9 @@ ENTRY(name)
342 */ 345 */
343NEXT_PAGE(init_level4_pgt) 346NEXT_PAGE(init_level4_pgt)
344 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 347 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
345 .fill 257,8,0 348 .org init_level4_pgt + L4_PAGE_OFFSET*8, 0
346 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 349 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
347 .fill 252,8,0 350 .org init_level4_pgt + L4_START_KERNEL*8, 0
348 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 351 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
349 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE 352 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
350 353
@@ -353,7 +356,7 @@ NEXT_PAGE(level3_ident_pgt)
353 .fill 511,8,0 356 .fill 511,8,0
354 357
355NEXT_PAGE(level3_kernel_pgt) 358NEXT_PAGE(level3_kernel_pgt)
356 .fill 510,8,0 359 .fill L3_START_KERNEL,8,0
357 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 360 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
358 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE 361 .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
359 .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE 362 .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
@@ -384,7 +387,7 @@ NEXT_PAGE(level2_kernel_pgt)
384 * If you want to increase this then increase MODULES_VADDR 387 * If you want to increase this then increase MODULES_VADDR
385 * too.) 388 * too.)
386 */ 389 */
387 PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, 390 PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
388 KERNEL_IMAGE_SIZE/PMD_SIZE) 391 KERNEL_IMAGE_SIZE/PMD_SIZE)
389 392
390NEXT_PAGE(level2_spare_pgt) 393NEXT_PAGE(level2_spare_pgt)
@@ -395,54 +398,16 @@ NEXT_PAGE(level2_spare_pgt)
395 398
396 .data 399 .data
397 .align 16 400 .align 16
398 .globl cpu_gdt_descr 401 .globl early_gdt_descr
399cpu_gdt_descr: 402early_gdt_descr:
400 .word gdt_end-cpu_gdt_table-1 403 .word GDT_ENTRIES*8-1
401gdt: 404 .quad per_cpu__gdt_page
402 .quad cpu_gdt_table
403#ifdef CONFIG_SMP
404 .rept NR_CPUS-1
405 .word 0
406 .quad 0
407 .endr
408#endif
409 405
410ENTRY(phys_base) 406ENTRY(phys_base)
411 /* This must match the first entry in level2_kernel_pgt */ 407 /* This must match the first entry in level2_kernel_pgt */
412 .quad 0x0000000000000000 408 .quad 0x0000000000000000
413 409
414/* We need valid kernel segments for data and code in long mode too
415 * IRET will check the segment types kkeil 2000/10/28
416 * Also sysret mandates a special GDT layout
417 */
418
419 .section .data.page_aligned, "aw"
420 .align PAGE_SIZE
421
422/* The TLS descriptors are currently at a different place compared to i386.
423 Hopefully nobody expects them at a fixed place (Wine?) */
424 410
425ENTRY(cpu_gdt_table)
426 .quad 0x0000000000000000 /* NULL descriptor */
427 .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
428 .quad 0x00af9b000000ffff /* __KERNEL_CS */
429 .quad 0x00cf93000000ffff /* __KERNEL_DS */
430 .quad 0x00cffb000000ffff /* __USER32_CS */
431 .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
432 .quad 0x00affb000000ffff /* __USER_CS */
433 .quad 0x0 /* unused */
434 .quad 0,0 /* TSS */
435 .quad 0,0 /* LDT */
436 .quad 0,0,0 /* three TLS descriptors */
437 .quad 0x0000f40000000000 /* node/CPU stored in limit */
438gdt_end:
439 /* asm/segment.h:GDT_ENTRIES must match this */
440 /* This should be a multiple of the cache line size */
441 /* GDTs of other CPUs are now dynamically allocated */
442
443 /* zero the remaining page */
444 .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
445
446 .section .bss, "aw", @nobits 411 .section .bss, "aw", @nobits
447 .align L1_CACHE_BYTES 412 .align L1_CACHE_BYTES
448ENTRY(idt_table) 413ENTRY(idt_table)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9b5cfcdfc426..ea230ec69057 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -17,7 +17,7 @@
17 17
18/* FSEC = 10^-15 18/* FSEC = 10^-15
19 NSEC = 10^-9 */ 19 NSEC = 10^-9 */
20#define FSEC_PER_NSEC 1000000 20#define FSEC_PER_NSEC 1000000L
21 21
22/* 22/*
23 * HPET address is set in acpi/boot.c, when an ACPI entry exists 23 * HPET address is set in acpi/boot.c, when an ACPI entry exists
@@ -206,20 +206,19 @@ static void hpet_enable_legacy_int(void)
206 206
207static void hpet_legacy_clockevent_register(void) 207static void hpet_legacy_clockevent_register(void)
208{ 208{
209 uint64_t hpet_freq;
210
211 /* Start HPET legacy interrupts */ 209 /* Start HPET legacy interrupts */
212 hpet_enable_legacy_int(); 210 hpet_enable_legacy_int();
213 211
214 /* 212 /*
215 * The period is a femto seconds value. We need to calculate the 213 * The mult factor is defined as (include/linux/clockchips.h)
216 * scaled math multiplication factor for nanosecond to hpet tick 214 * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
217 * conversion. 215 * hpet_period is in units of femtoseconds (per cycle), so
216 * mult/2^shift = cyc/ns = 10^6/hpet_period
217 * mult = (10^6 * 2^shift)/hpet_period
218 * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
218 */ 219 */
219 hpet_freq = 1000000000000000ULL; 220 hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
220 do_div(hpet_freq, hpet_period); 221 hpet_period, hpet_clockevent.shift);
221 hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
222 NSEC_PER_SEC, hpet_clockevent.shift);
223 /* Calculate the min / max delta */ 222 /* Calculate the min / max delta */
224 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 223 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
225 &hpet_clockevent); 224 &hpet_clockevent);
@@ -324,7 +323,7 @@ static struct clocksource clocksource_hpet = {
324 323
325static int hpet_clocksource_register(void) 324static int hpet_clocksource_register(void)
326{ 325{
327 u64 tmp, start, now; 326 u64 start, now;
328 cycle_t t1; 327 cycle_t t1;
329 328
330 /* Start the counter */ 329 /* Start the counter */
@@ -351,21 +350,15 @@ static int hpet_clocksource_register(void)
351 return -ENODEV; 350 return -ENODEV;
352 } 351 }
353 352
354 /* Initialize and register HPET clocksource 353 /*
355 * 354 * The definition of mult is (include/linux/clocksource.h)
356 * hpet period is in femto seconds per cycle 355 * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
357 * so we need to convert this to ns/cyc units 356 * so we first need to convert hpet_period to ns/cyc units:
358 * approximated by mult/2^shift 357 * mult/2^shift = ns/cyc = hpet_period/10^6
359 * 358 * mult = (hpet_period * 2^shift)/10^6
360 * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift 359 * mult = (hpet_period << shift)/FSEC_PER_NSEC
361 * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
362 * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
363 * (fsec/cyc << shift)/1000000 = mult
364 * (hpet_period << shift)/FSEC_PER_NSEC = mult
365 */ 360 */
366 tmp = (u64)hpet_period << HPET_SHIFT; 361 clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT);
367 do_div(tmp, FSEC_PER_NSEC);
368 clocksource_hpet.mult = (u32)tmp;
369 362
370 clocksource_register(&clocksource_hpet); 363 clocksource_register(&clocksource_hpet);
371 364
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 95e80e5033c3..eb9ddd8efb82 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -162,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
162 int ret; 162 int ret;
163 163
164 if (!cpu_has_fxsr) 164 if (!cpu_has_fxsr)
165 return -EIO; 165 return -ENODEV;
166 166
167 ret = init_fpu(target); 167 ret = init_fpu(target);
168 if (ret) 168 if (ret)
@@ -179,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
179 int ret; 179 int ret;
180 180
181 if (!cpu_has_fxsr) 181 if (!cpu_has_fxsr)
182 return -EIO; 182 return -ENODEV;
183 183
184 ret = init_fpu(target); 184 ret = init_fpu(target);
185 if (ret) 185 if (ret)
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259.c
index fe631967d625..dc92b49d9204 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259.c
@@ -1,8 +1,10 @@
1#include <linux/linkage.h>
1#include <linux/errno.h> 2#include <linux/errno.h>
2#include <linux/signal.h> 3#include <linux/signal.h>
3#include <linux/sched.h> 4#include <linux/sched.h>
4#include <linux/ioport.h> 5#include <linux/ioport.h>
5#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/timex.h>
6#include <linux/slab.h> 8#include <linux/slab.h>
7#include <linux/random.h> 9#include <linux/random.h>
8#include <linux/init.h> 10#include <linux/init.h>
@@ -10,10 +12,12 @@
10#include <linux/sysdev.h> 12#include <linux/sysdev.h>
11#include <linux/bitops.h> 13#include <linux/bitops.h>
12 14
15#include <asm/acpi.h>
13#include <asm/atomic.h> 16#include <asm/atomic.h>
14#include <asm/system.h> 17#include <asm/system.h>
15#include <asm/io.h> 18#include <asm/io.h>
16#include <asm/timer.h> 19#include <asm/timer.h>
20#include <asm/hw_irq.h>
17#include <asm/pgtable.h> 21#include <asm/pgtable.h>
18#include <asm/delay.h> 22#include <asm/delay.h>
19#include <asm/desc.h> 23#include <asm/desc.h>
@@ -32,7 +36,7 @@ static int i8259A_auto_eoi;
32DEFINE_SPINLOCK(i8259A_lock); 36DEFINE_SPINLOCK(i8259A_lock);
33static void mask_and_ack_8259A(unsigned int); 37static void mask_and_ack_8259A(unsigned int);
34 38
35static struct irq_chip i8259A_chip = { 39struct irq_chip i8259A_chip = {
36 .name = "XT-PIC", 40 .name = "XT-PIC",
37 .mask = disable_8259A_irq, 41 .mask = disable_8259A_irq,
38 .disable = disable_8259A_irq, 42 .disable = disable_8259A_irq,
@@ -125,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq)
125 int irqmask = 1<<irq; 129 int irqmask = 1<<irq;
126 130
127 if (irq < 8) { 131 if (irq < 8) {
128 outb(0x0B,PIC_MASTER_CMD); /* ISR register */ 132 outb(0x0B, PIC_MASTER_CMD); /* ISR register */
129 value = inb(PIC_MASTER_CMD) & irqmask; 133 value = inb(PIC_MASTER_CMD) & irqmask;
130 outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ 134 outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */
131 return value; 135 return value;
132 } 136 }
133 outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ 137 outb(0x0B, PIC_SLAVE_CMD); /* ISR register */
134 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); 138 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
135 outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ 139 outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */
136 return value; 140 return value;
137} 141}
138 142
@@ -171,12 +175,14 @@ handle_real_irq:
171 if (irq & 8) { 175 if (irq & 8) {
172 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ 176 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
173 outb(cached_slave_mask, PIC_SLAVE_IMR); 177 outb(cached_slave_mask, PIC_SLAVE_IMR);
174 outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ 178 /* 'Specific EOI' to slave */
175 outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ 179 outb(0x60+(irq&7), PIC_SLAVE_CMD);
180 /* 'Specific EOI' to master-IRQ2 */
181 outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD);
176 } else { 182 } else {
177 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ 183 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
178 outb(cached_master_mask, PIC_MASTER_IMR); 184 outb(cached_master_mask, PIC_MASTER_IMR);
179 outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ 185 outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */
180 } 186 }
181 spin_unlock_irqrestore(&i8259A_lock, flags); 187 spin_unlock_irqrestore(&i8259A_lock, flags);
182 return; 188 return;
@@ -199,7 +205,8 @@ spurious_8259A_irq:
199 * lets ACK and report it. [once per IRQ] 205 * lets ACK and report it. [once per IRQ]
200 */ 206 */
201 if (!(spurious_irq_mask & irqmask)) { 207 if (!(spurious_irq_mask & irqmask)) {
202 printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); 208 printk(KERN_DEBUG
209 "spurious 8259A interrupt: IRQ%d.\n", irq);
203 spurious_irq_mask |= irqmask; 210 spurious_irq_mask |= irqmask;
204 } 211 }
205 atomic_inc(&irq_err_count); 212 atomic_inc(&irq_err_count);
@@ -290,17 +297,28 @@ void init_8259A(int auto_eoi)
290 * outb_pic - this has to work on a wide range of PC hardware. 297 * outb_pic - this has to work on a wide range of PC hardware.
291 */ 298 */
292 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ 299 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
293 outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ 300
294 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ 301 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
302 to 0x20-0x27 on i386 */
303 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
304
305 /* 8259A-1 (the master) has a slave on IR2 */
306 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
307
295 if (auto_eoi) /* master does Auto EOI */ 308 if (auto_eoi) /* master does Auto EOI */
296 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); 309 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
297 else /* master expects normal EOI */ 310 else /* master expects normal EOI */
298 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); 311 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
299 312
300 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ 313 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
301 outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ 314
302 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ 315 /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */
303 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ 316 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
317 /* 8259A-2 is a slave on master's IR2 */
318 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
319 /* (slave's support for AEOI in flat mode is to be investigated) */
320 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
321
304 if (auto_eoi) 322 if (auto_eoi)
305 /* 323 /*
306 * In AEOI mode we just have to mask the interrupt 324 * In AEOI mode we just have to mask the interrupt
@@ -317,93 +335,3 @@ void init_8259A(int auto_eoi)
317 335
318 spin_unlock_irqrestore(&i8259A_lock, flags); 336 spin_unlock_irqrestore(&i8259A_lock, flags);
319} 337}
320
321/*
322 * Note that on a 486, we don't want to do a SIGFPE on an irq13
323 * as the irq is unreliable, and exception 16 works correctly
324 * (ie as explained in the intel literature). On a 386, you
325 * can't use exception 16 due to bad IBM design, so we have to
326 * rely on the less exact irq13.
327 *
328 * Careful.. Not only is IRQ13 unreliable, but it is also
329 * leads to races. IBM designers who came up with it should
330 * be shot.
331 */
332
333
334static irqreturn_t math_error_irq(int cpl, void *dev_id)
335{
336 extern void math_error(void __user *);
337 outb(0,0xF0);
338 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
339 return IRQ_NONE;
340 math_error((void __user *)get_irq_regs()->ip);
341 return IRQ_HANDLED;
342}
343
344/*
345 * New motherboards sometimes make IRQ 13 be a PCI interrupt,
346 * so allow interrupt sharing.
347 */
348static struct irqaction fpu_irq = {
349 .handler = math_error_irq,
350 .mask = CPU_MASK_NONE,
351 .name = "fpu",
352};
353
354void __init init_ISA_irqs (void)
355{
356 int i;
357
358#ifdef CONFIG_X86_LOCAL_APIC
359 init_bsp_APIC();
360#endif
361 init_8259A(0);
362
363 /*
364 * 16 old-style INTA-cycle interrupts:
365 */
366 for (i = 0; i < 16; i++) {
367 set_irq_chip_and_handler_name(i, &i8259A_chip,
368 handle_level_irq, "XT");
369 }
370}
371
372/* Overridden in paravirt.c */
373void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
374
375void __init native_init_IRQ(void)
376{
377 int i;
378
379 /* all the set up before the call gates are initialised */
380 pre_intr_init_hook();
381
382 /*
383 * Cover the whole vector space, no vector can escape
384 * us. (some of these will be overridden and become
385 * 'special' SMP interrupts)
386 */
387 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
388 int vector = FIRST_EXTERNAL_VECTOR + i;
389 if (i >= NR_IRQS)
390 break;
391 /* SYSCALL_VECTOR was reserved in trap_init. */
392 if (!test_bit(vector, used_vectors))
393 set_intr_gate(vector, interrupt[i]);
394 }
395
396 /* setup after call gates are initialised (usually add in
397 * the architecture specific gates)
398 */
399 intr_init_hook();
400
401 /*
402 * External FPU? Set up irq13 if so, for
403 * original braindamaged IBM FERR coupling.
404 */
405 if (boot_cpu_data.hard_math && !cpu_has_fpu)
406 setup_irq(FPU_IRQ, &fpu_irq);
407
408 irq_ctx_init(smp_processor_id());
409}
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
deleted file mode 100644
index fa57a1568508..000000000000
--- a/arch/x86/kernel/i8259_64.c
+++ /dev/null
@@ -1,512 +0,0 @@
1#include <linux/linkage.h>
2#include <linux/errno.h>
3#include <linux/signal.h>
4#include <linux/sched.h>
5#include <linux/ioport.h>
6#include <linux/interrupt.h>
7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h>
10#include <linux/init.h>
11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h>
13#include <linux/bitops.h>
14
15#include <asm/acpi.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/hw_irq.h>
20#include <asm/pgtable.h>
21#include <asm/delay.h>
22#include <asm/desc.h>
23#include <asm/apic.h>
24#include <asm/i8259.h>
25
26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define BI(x,y) \
38 BUILD_IRQ(x##y)
39
40#define BUILD_16_IRQS(x) \
41 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
42 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
43 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
44 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
45
46/*
47 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
48 * (these are usually mapped to vectors 0x30-0x3f)
49 */
50
51/*
52 * The IO-APIC gives us many more interrupt sources. Most of these
53 * are unused but an SMP system is supposed to have enough memory ...
54 * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
55 * across the spectrum, so we really want to be prepared to get all
56 * of these. Plus, more powerful systems might have more than 64
57 * IO-APIC registers.
58 *
59 * (these are usually mapped into the 0x30-0xff vector range)
60 */
61 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
62BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
63BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
64BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
65
66#undef BUILD_16_IRQS
67#undef BI
68
69
70#define IRQ(x,y) \
71 IRQ##x##y##_interrupt
72
73#define IRQLIST_16(x) \
74 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
75 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
76 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
77 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
78
79/* for the irq vectors */
80static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
81 IRQLIST_16(0x2), IRQLIST_16(0x3),
82 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
83 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
84 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
85};
86
87#undef IRQ
88#undef IRQLIST_16
89
90/*
91 * This is the 'legacy' 8259A Programmable Interrupt Controller,
92 * present in the majority of PC/AT boxes.
93 * plus some generic x86 specific things if generic specifics makes
94 * any sense at all.
95 * this file should become arch/i386/kernel/irq.c when the old irq.c
96 * moves to arch independent land
97 */
98
99static int i8259A_auto_eoi;
100DEFINE_SPINLOCK(i8259A_lock);
101static void mask_and_ack_8259A(unsigned int);
102
103static struct irq_chip i8259A_chip = {
104 .name = "XT-PIC",
105 .mask = disable_8259A_irq,
106 .disable = disable_8259A_irq,
107 .unmask = enable_8259A_irq,
108 .mask_ack = mask_and_ack_8259A,
109};
110
111/*
112 * 8259A PIC functions to handle ISA devices:
113 */
114
115/*
116 * This contains the irq mask for both 8259A irq controllers,
117 */
118unsigned int cached_irq_mask = 0xffff;
119
120/*
121 * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
122 * boards the timer interrupt is not really connected to any IO-APIC pin,
123 * it's fed to the master 8259A's IR0 line only.
124 *
125 * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
126 * this 'mixed mode' IRQ handling costs nothing because it's only used
127 * at IRQ setup time.
128 */
129unsigned long io_apic_irqs;
130
131void disable_8259A_irq(unsigned int irq)
132{
133 unsigned int mask = 1 << irq;
134 unsigned long flags;
135
136 spin_lock_irqsave(&i8259A_lock, flags);
137 cached_irq_mask |= mask;
138 if (irq & 8)
139 outb(cached_slave_mask, PIC_SLAVE_IMR);
140 else
141 outb(cached_master_mask, PIC_MASTER_IMR);
142 spin_unlock_irqrestore(&i8259A_lock, flags);
143}
144
145void enable_8259A_irq(unsigned int irq)
146{
147 unsigned int mask = ~(1 << irq);
148 unsigned long flags;
149
150 spin_lock_irqsave(&i8259A_lock, flags);
151 cached_irq_mask &= mask;
152 if (irq & 8)
153 outb(cached_slave_mask, PIC_SLAVE_IMR);
154 else
155 outb(cached_master_mask, PIC_MASTER_IMR);
156 spin_unlock_irqrestore(&i8259A_lock, flags);
157}
158
159int i8259A_irq_pending(unsigned int irq)
160{
161 unsigned int mask = 1<<irq;
162 unsigned long flags;
163 int ret;
164
165 spin_lock_irqsave(&i8259A_lock, flags);
166 if (irq < 8)
167 ret = inb(PIC_MASTER_CMD) & mask;
168 else
169 ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
170 spin_unlock_irqrestore(&i8259A_lock, flags);
171
172 return ret;
173}
174
175void make_8259A_irq(unsigned int irq)
176{
177 disable_irq_nosync(irq);
178 io_apic_irqs &= ~(1<<irq);
179 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
180 "XT");
181 enable_irq(irq);
182}
183
184/*
185 * This function assumes to be called rarely. Switching between
186 * 8259A registers is slow.
187 * This has to be protected by the irq controller spinlock
188 * before being called.
189 */
190static inline int i8259A_irq_real(unsigned int irq)
191{
192 int value;
193 int irqmask = 1<<irq;
194
195 if (irq < 8) {
196 outb(0x0B,PIC_MASTER_CMD); /* ISR register */
197 value = inb(PIC_MASTER_CMD) & irqmask;
198 outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */
199 return value;
200 }
201 outb(0x0B,PIC_SLAVE_CMD); /* ISR register */
202 value = inb(PIC_SLAVE_CMD) & (irqmask >> 8);
203 outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */
204 return value;
205}
206
207/*
208 * Careful! The 8259A is a fragile beast, it pretty
209 * much _has_ to be done exactly like this (mask it
210 * first, _then_ send the EOI, and the order of EOI
211 * to the two 8259s is important!
212 */
213static void mask_and_ack_8259A(unsigned int irq)
214{
215 unsigned int irqmask = 1 << irq;
216 unsigned long flags;
217
218 spin_lock_irqsave(&i8259A_lock, flags);
219 /*
220 * Lightweight spurious IRQ detection. We do not want
221 * to overdo spurious IRQ handling - it's usually a sign
222 * of hardware problems, so we only do the checks we can
223 * do without slowing down good hardware unnecessarily.
224 *
225 * Note that IRQ7 and IRQ15 (the two spurious IRQs
226 * usually resulting from the 8259A-1|2 PICs) occur
227 * even if the IRQ is masked in the 8259A. Thus we
228 * can check spurious 8259A IRQs without doing the
229 * quite slow i8259A_irq_real() call for every IRQ.
230 * This does not cover 100% of spurious interrupts,
231 * but should be enough to warn the user that there
232 * is something bad going on ...
233 */
234 if (cached_irq_mask & irqmask)
235 goto spurious_8259A_irq;
236 cached_irq_mask |= irqmask;
237
238handle_real_irq:
239 if (irq & 8) {
240 inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */
241 outb(cached_slave_mask, PIC_SLAVE_IMR);
242 /* 'Specific EOI' to slave */
243 outb(0x60+(irq&7),PIC_SLAVE_CMD);
244 /* 'Specific EOI' to master-IRQ2 */
245 outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD);
246 } else {
247 inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */
248 outb(cached_master_mask, PIC_MASTER_IMR);
249 /* 'Specific EOI' to master */
250 outb(0x60+irq,PIC_MASTER_CMD);
251 }
252 spin_unlock_irqrestore(&i8259A_lock, flags);
253 return;
254
255spurious_8259A_irq:
256 /*
257 * this is the slow path - should happen rarely.
258 */
259 if (i8259A_irq_real(irq))
260 /*
261 * oops, the IRQ _is_ in service according to the
262 * 8259A - not spurious, go handle it.
263 */
264 goto handle_real_irq;
265
266 {
267 static int spurious_irq_mask;
268 /*
269 * At this point we can be sure the IRQ is spurious,
270 * lets ACK and report it. [once per IRQ]
271 */
272 if (!(spurious_irq_mask & irqmask)) {
273 printk(KERN_DEBUG
274 "spurious 8259A interrupt: IRQ%d.\n", irq);
275 spurious_irq_mask |= irqmask;
276 }
277 atomic_inc(&irq_err_count);
278 /*
279 * Theoretically we do not have to handle this IRQ,
280 * but in Linux this does not cause problems and is
281 * simpler for us.
282 */
283 goto handle_real_irq;
284 }
285}
286
287static char irq_trigger[2];
288/**
289 * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ
290 */
291static void restore_ELCR(char *trigger)
292{
293 outb(trigger[0], 0x4d0);
294 outb(trigger[1], 0x4d1);
295}
296
297static void save_ELCR(char *trigger)
298{
299 /* IRQ 0,1,2,8,13 are marked as reserved */
300 trigger[0] = inb(0x4d0) & 0xF8;
301 trigger[1] = inb(0x4d1) & 0xDE;
302}
303
304static int i8259A_resume(struct sys_device *dev)
305{
306 init_8259A(i8259A_auto_eoi);
307 restore_ELCR(irq_trigger);
308 return 0;
309}
310
311static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
312{
313 save_ELCR(irq_trigger);
314 return 0;
315}
316
317static int i8259A_shutdown(struct sys_device *dev)
318{
319 /* Put the i8259A into a quiescent state that
320 * the kernel initialization code can get it
321 * out of.
322 */
323 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
324 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
325 return 0;
326}
327
328static struct sysdev_class i8259_sysdev_class = {
329 .name = "i8259",
330 .suspend = i8259A_suspend,
331 .resume = i8259A_resume,
332 .shutdown = i8259A_shutdown,
333};
334
335static struct sys_device device_i8259A = {
336 .id = 0,
337 .cls = &i8259_sysdev_class,
338};
339
340static int __init i8259A_init_sysfs(void)
341{
342 int error = sysdev_class_register(&i8259_sysdev_class);
343 if (!error)
344 error = sysdev_register(&device_i8259A);
345 return error;
346}
347
348device_initcall(i8259A_init_sysfs);
349
350void init_8259A(int auto_eoi)
351{
352 unsigned long flags;
353
354 i8259A_auto_eoi = auto_eoi;
355
356 spin_lock_irqsave(&i8259A_lock, flags);
357
358 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
359 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
360
361 /*
362 * outb_pic - this has to work on a wide range of PC hardware.
363 */
364 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
365 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
366 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
367 /* 8259A-1 (the master) has a slave on IR2 */
368 outb_pic(0x04, PIC_MASTER_IMR);
369 if (auto_eoi) /* master does Auto EOI */
370 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
371 else /* master expects normal EOI */
372 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
373
374 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */
375 /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */
376 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR);
377 /* 8259A-2 is a slave on master's IR2 */
378 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR);
379 /* (slave's support for AEOI in flat mode is to be investigated) */
380 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR);
381
382 if (auto_eoi)
383 /*
384 * In AEOI mode we just have to mask the interrupt
385 * when acking.
386 */
387 i8259A_chip.mask_ack = disable_8259A_irq;
388 else
389 i8259A_chip.mask_ack = mask_and_ack_8259A;
390
391 udelay(100); /* wait for 8259A to initialize */
392
393 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
394 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
395
396 spin_unlock_irqrestore(&i8259A_lock, flags);
397}
398
399
400
401
402/*
403 * IRQ2 is cascade interrupt to second interrupt controller
404 */
405
406static struct irqaction irq2 = {
407 .handler = no_action,
408 .mask = CPU_MASK_NONE,
409 .name = "cascade",
410};
411DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
412 [0 ... IRQ0_VECTOR - 1] = -1,
413 [IRQ0_VECTOR] = 0,
414 [IRQ1_VECTOR] = 1,
415 [IRQ2_VECTOR] = 2,
416 [IRQ3_VECTOR] = 3,
417 [IRQ4_VECTOR] = 4,
418 [IRQ5_VECTOR] = 5,
419 [IRQ6_VECTOR] = 6,
420 [IRQ7_VECTOR] = 7,
421 [IRQ8_VECTOR] = 8,
422 [IRQ9_VECTOR] = 9,
423 [IRQ10_VECTOR] = 10,
424 [IRQ11_VECTOR] = 11,
425 [IRQ12_VECTOR] = 12,
426 [IRQ13_VECTOR] = 13,
427 [IRQ14_VECTOR] = 14,
428 [IRQ15_VECTOR] = 15,
429 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
430};
431
432void __init init_ISA_irqs (void)
433{
434 int i;
435
436 init_bsp_APIC();
437 init_8259A(0);
438
439 for (i = 0; i < NR_IRQS; i++) {
440 irq_desc[i].status = IRQ_DISABLED;
441 irq_desc[i].action = NULL;
442 irq_desc[i].depth = 1;
443
444 if (i < 16) {
445 /*
446 * 16 old-style INTA-cycle interrupts:
447 */
448 set_irq_chip_and_handler_name(i, &i8259A_chip,
449 handle_level_irq, "XT");
450 } else {
451 /*
452 * 'high' PCI IRQs filled in on demand
453 */
454 irq_desc[i].chip = &no_irq_chip;
455 }
456 }
457}
458
459void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
460
461void __init native_init_IRQ(void)
462{
463 int i;
464
465 init_ISA_irqs();
466 /*
467 * Cover the whole vector space, no vector can escape
468 * us. (some of these will be overridden and become
469 * 'special' SMP interrupts)
470 */
471 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
472 int vector = FIRST_EXTERNAL_VECTOR + i;
473 if (vector != IA32_SYSCALL_VECTOR)
474 set_intr_gate(vector, interrupt[i]);
475 }
476
477#ifdef CONFIG_SMP
478 /*
479 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
480 * IPI, driven by wakeup.
481 */
482 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
483
484 /* IPIs for invalidation */
485 set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
486 set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
487 set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
488 set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
489 set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
490 set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
491 set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
492 set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
493
494 /* IPI for generic function call */
495 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
496
497 /* Low priority IPI to cleanup after moving an irq */
498 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
499#endif
500 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
501 set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
502
503 /* self generated IPI for local APIC timer */
504 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
505
506 /* IPI vectors for APIC spurious and error interrupts */
507 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
508 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
509
510 if (!acpi_ioapic)
511 setup_irq(2, &irq2);
512}
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 4dc8600d9d20..c50adb84ea6f 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -25,6 +25,7 @@
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/delay.h> 26#include <linux/delay.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/bootmem.h>
28#include <linux/mc146818rtc.h> 29#include <linux/mc146818rtc.h>
29#include <linux/compiler.h> 30#include <linux/compiler.h>
30#include <linux/acpi.h> 31#include <linux/acpi.h>
@@ -58,7 +59,14 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
58static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
59static DEFINE_SPINLOCK(vector_lock); 60static DEFINE_SPINLOCK(vector_lock);
60 61
61int timer_over_8254 __initdata = 1; 62static bool mask_ioapic_irq_2 __initdata;
63
64void __init force_mask_ioapic_irq_2(void)
65{
66 mask_ioapic_irq_2 = true;
67}
68
69int timer_through_8259 __initdata;
62 70
63/* 71/*
64 * Is the SiS APIC rmw bug present ? 72 * Is the SiS APIC rmw bug present ?
@@ -72,15 +80,21 @@ int sis_apic_bug = -1;
72int nr_ioapic_registers[MAX_IO_APICS]; 80int nr_ioapic_registers[MAX_IO_APICS];
73 81
74/* I/O APIC entries */ 82/* I/O APIC entries */
75struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; 83struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
76int nr_ioapics; 84int nr_ioapics;
77 85
78/* MP IRQ source entries */ 86/* MP IRQ source entries */
79struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 87struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
80 88
81/* # of MP IRQ source entries */ 89/* # of MP IRQ source entries */
82int mp_irq_entries; 90int mp_irq_entries;
83 91
92#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
93int mp_bus_id_to_type[MAX_MP_BUSSES];
94#endif
95
96DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
97
84static int disable_timer_pin_1 __initdata; 98static int disable_timer_pin_1 __initdata;
85 99
86/* 100/*
@@ -110,7 +124,7 @@ struct io_apic {
110static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 124static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
111{ 125{
112 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 126 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
113 + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); 127 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
114} 128}
115 129
116static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 130static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -239,7 +253,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
239 } 253 }
240} 254}
241 255
242static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) 256static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
243{ 257{
244 struct irq_pin_list *entry = irq_2_pin + irq; 258 struct irq_pin_list *entry = irq_2_pin + irq;
245 unsigned int pin, reg; 259 unsigned int pin, reg;
@@ -259,30 +273,32 @@ static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsign
259} 273}
260 274
261/* mask = 1 */ 275/* mask = 1 */
262static void __mask_IO_APIC_irq (unsigned int irq) 276static void __mask_IO_APIC_irq(unsigned int irq)
263{ 277{
264 __modify_IO_APIC_irq(irq, 0x00010000, 0); 278 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
265} 279}
266 280
267/* mask = 0 */ 281/* mask = 0 */
268static void __unmask_IO_APIC_irq (unsigned int irq) 282static void __unmask_IO_APIC_irq(unsigned int irq)
269{ 283{
270 __modify_IO_APIC_irq(irq, 0, 0x00010000); 284 __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
271} 285}
272 286
273/* mask = 1, trigger = 0 */ 287/* mask = 1, trigger = 0 */
274static void __mask_and_edge_IO_APIC_irq (unsigned int irq) 288static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
275{ 289{
276 __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); 290 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
291 IO_APIC_REDIR_LEVEL_TRIGGER);
277} 292}
278 293
279/* mask = 0, trigger = 1 */ 294/* mask = 0, trigger = 1 */
280static void __unmask_and_level_IO_APIC_irq (unsigned int irq) 295static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
281{ 296{
282 __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); 297 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
298 IO_APIC_REDIR_MASKED);
283} 299}
284 300
285static void mask_IO_APIC_irq (unsigned int irq) 301static void mask_IO_APIC_irq(unsigned int irq)
286{ 302{
287 unsigned long flags; 303 unsigned long flags;
288 304
@@ -291,7 +307,7 @@ static void mask_IO_APIC_irq (unsigned int irq)
291 spin_unlock_irqrestore(&ioapic_lock, flags); 307 spin_unlock_irqrestore(&ioapic_lock, flags);
292} 308}
293 309
294static void unmask_IO_APIC_irq (unsigned int irq) 310static void unmask_IO_APIC_irq(unsigned int irq)
295{ 311{
296 unsigned long flags; 312 unsigned long flags;
297 313
@@ -303,7 +319,7 @@ static void unmask_IO_APIC_irq (unsigned int irq)
303static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 319static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
304{ 320{
305 struct IO_APIC_route_entry entry; 321 struct IO_APIC_route_entry entry;
306 322
307 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 323 /* Check delivery_mode to be sure we're not clearing an SMI pin */
308 entry = ioapic_read_entry(apic, pin); 324 entry = ioapic_read_entry(apic, pin);
309 if (entry.delivery_mode == dest_SMI) 325 if (entry.delivery_mode == dest_SMI)
@@ -315,7 +331,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
315 ioapic_mask_entry(apic, pin); 331 ioapic_mask_entry(apic, pin);
316} 332}
317 333
318static void clear_IO_APIC (void) 334static void clear_IO_APIC(void)
319{ 335{
320 int apic, pin; 336 int apic, pin;
321 337
@@ -332,7 +348,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
332 struct irq_pin_list *entry = irq_2_pin + irq; 348 struct irq_pin_list *entry = irq_2_pin + irq;
333 unsigned int apicid_value; 349 unsigned int apicid_value;
334 cpumask_t tmp; 350 cpumask_t tmp;
335 351
336 cpus_and(tmp, cpumask, cpu_online_map); 352 cpus_and(tmp, cpumask, cpu_online_map);
337 if (cpus_empty(tmp)) 353 if (cpus_empty(tmp))
338 tmp = TARGET_CPUS; 354 tmp = TARGET_CPUS;
@@ -361,7 +377,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
361# include <linux/kernel_stat.h> /* kstat */ 377# include <linux/kernel_stat.h> /* kstat */
362# include <linux/slab.h> /* kmalloc() */ 378# include <linux/slab.h> /* kmalloc() */
363# include <linux/timer.h> 379# include <linux/timer.h>
364 380
365#define IRQBALANCE_CHECK_ARCH -999 381#define IRQBALANCE_CHECK_ARCH -999
366#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) 382#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
367#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) 383#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
@@ -373,14 +389,14 @@ static int physical_balance __read_mostly;
373static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; 389static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
374 390
375static struct irq_cpu_info { 391static struct irq_cpu_info {
376 unsigned long * last_irq; 392 unsigned long *last_irq;
377 unsigned long * irq_delta; 393 unsigned long *irq_delta;
378 unsigned long irq; 394 unsigned long irq;
379} irq_cpu_data[NR_CPUS]; 395} irq_cpu_data[NR_CPUS];
380 396
381#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) 397#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
382#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) 398#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
383#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) 399#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
384 400
385#define IDLE_ENOUGH(cpu,now) \ 401#define IDLE_ENOUGH(cpu,now) \
386 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) 402 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
@@ -419,8 +435,8 @@ inside:
419 if (cpu == -1) 435 if (cpu == -1)
420 cpu = NR_CPUS-1; 436 cpu = NR_CPUS-1;
421 } 437 }
422 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || 438 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
423 (search_idle && !IDLE_ENOUGH(cpu,now))); 439 (search_idle && !IDLE_ENOUGH(cpu, now)));
424 440
425 return cpu; 441 return cpu;
426} 442}
@@ -430,15 +446,14 @@ static inline void balance_irq(int cpu, int irq)
430 unsigned long now = jiffies; 446 unsigned long now = jiffies;
431 cpumask_t allowed_mask; 447 cpumask_t allowed_mask;
432 unsigned int new_cpu; 448 unsigned int new_cpu;
433 449
434 if (irqbalance_disabled) 450 if (irqbalance_disabled)
435 return; 451 return;
436 452
437 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); 453 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
438 new_cpu = move(cpu, allowed_mask, now, 1); 454 new_cpu = move(cpu, allowed_mask, now, 1);
439 if (cpu != new_cpu) { 455 if (cpu != new_cpu)
440 set_pending_irq(irq, cpumask_of_cpu(new_cpu)); 456 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
441 }
442} 457}
443 458
444static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) 459static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
@@ -450,14 +465,14 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
450 if (!irq_desc[j].action) 465 if (!irq_desc[j].action)
451 continue; 466 continue;
452 /* Is it a significant load ? */ 467 /* Is it a significant load ? */
453 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < 468 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
454 useful_load_threshold) 469 useful_load_threshold)
455 continue; 470 continue;
456 balance_irq(i, j); 471 balance_irq(i, j);
457 } 472 }
458 } 473 }
459 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 474 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
460 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 475 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
461 return; 476 return;
462} 477}
463 478
@@ -486,22 +501,22 @@ static void do_irq_balance(void)
486 /* Is this an active IRQ or balancing disabled ? */ 501 /* Is this an active IRQ or balancing disabled ? */
487 if (!irq_desc[j].action || irq_balancing_disabled(j)) 502 if (!irq_desc[j].action || irq_balancing_disabled(j))
488 continue; 503 continue;
489 if ( package_index == i ) 504 if (package_index == i)
490 IRQ_DELTA(package_index,j) = 0; 505 IRQ_DELTA(package_index, j) = 0;
491 /* Determine the total count per processor per IRQ */ 506 /* Determine the total count per processor per IRQ */
492 value_now = (unsigned long) kstat_cpu(i).irqs[j]; 507 value_now = (unsigned long) kstat_cpu(i).irqs[j];
493 508
494 /* Determine the activity per processor per IRQ */ 509 /* Determine the activity per processor per IRQ */
495 delta = value_now - LAST_CPU_IRQ(i,j); 510 delta = value_now - LAST_CPU_IRQ(i, j);
496 511
497 /* Update last_cpu_irq[][] for the next time */ 512 /* Update last_cpu_irq[][] for the next time */
498 LAST_CPU_IRQ(i,j) = value_now; 513 LAST_CPU_IRQ(i, j) = value_now;
499 514
500 /* Ignore IRQs whose rate is less than the clock */ 515 /* Ignore IRQs whose rate is less than the clock */
501 if (delta < useful_load_threshold) 516 if (delta < useful_load_threshold)
502 continue; 517 continue;
503 /* update the load for the processor or package total */ 518 /* update the load for the processor or package total */
504 IRQ_DELTA(package_index,j) += delta; 519 IRQ_DELTA(package_index, j) += delta;
505 520
506 /* Keep track of the higher numbered sibling as well */ 521 /* Keep track of the higher numbered sibling as well */
507 if (i != package_index) 522 if (i != package_index)
@@ -527,7 +542,8 @@ static void do_irq_balance(void)
527 max_cpu_irq = ULONG_MAX; 542 max_cpu_irq = ULONG_MAX;
528 543
529tryanothercpu: 544tryanothercpu:
530 /* Look for heaviest loaded processor. 545 /*
546 * Look for heaviest loaded processor.
531 * We may come back to get the next heaviest loaded processor. 547 * We may come back to get the next heaviest loaded processor.
532 * Skip processors with trivial loads. 548 * Skip processors with trivial loads.
533 */ 549 */
@@ -536,7 +552,7 @@ tryanothercpu:
536 for_each_online_cpu(i) { 552 for_each_online_cpu(i) {
537 if (i != CPU_TO_PACKAGEINDEX(i)) 553 if (i != CPU_TO_PACKAGEINDEX(i))
538 continue; 554 continue;
539 if (max_cpu_irq <= CPU_IRQ(i)) 555 if (max_cpu_irq <= CPU_IRQ(i))
540 continue; 556 continue;
541 if (tmp_cpu_irq < CPU_IRQ(i)) { 557 if (tmp_cpu_irq < CPU_IRQ(i)) {
542 tmp_cpu_irq = CPU_IRQ(i); 558 tmp_cpu_irq = CPU_IRQ(i);
@@ -545,8 +561,9 @@ tryanothercpu:
545 } 561 }
546 562
547 if (tmp_loaded == -1) { 563 if (tmp_loaded == -1) {
548 /* In the case of small number of heavy interrupt sources, 564 /*
549 * loading some of the cpus too much. We use Ingo's original 565 * In the case of small number of heavy interrupt sources,
566 * loading some of the cpus too much. We use Ingo's original
550 * approach to rotate them around. 567 * approach to rotate them around.
551 */ 568 */
552 if (!first_attempt && imbalance >= useful_load_threshold) { 569 if (!first_attempt && imbalance >= useful_load_threshold) {
@@ -555,13 +572,14 @@ tryanothercpu:
555 } 572 }
556 goto not_worth_the_effort; 573 goto not_worth_the_effort;
557 } 574 }
558 575
559 first_attempt = 0; /* heaviest search */ 576 first_attempt = 0; /* heaviest search */
560 max_cpu_irq = tmp_cpu_irq; /* load */ 577 max_cpu_irq = tmp_cpu_irq; /* load */
561 max_loaded = tmp_loaded; /* processor */ 578 max_loaded = tmp_loaded; /* processor */
562 imbalance = (max_cpu_irq - min_cpu_irq) / 2; 579 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
563 580
564 /* if imbalance is less than approx 10% of max load, then 581 /*
582 * if imbalance is less than approx 10% of max load, then
565 * observe diminishing returns action. - quit 583 * observe diminishing returns action. - quit
566 */ 584 */
567 if (imbalance < (max_cpu_irq >> 3)) 585 if (imbalance < (max_cpu_irq >> 3))
@@ -577,26 +595,25 @@ tryanotherirq:
577 /* Is this an active IRQ? */ 595 /* Is this an active IRQ? */
578 if (!irq_desc[j].action) 596 if (!irq_desc[j].action)
579 continue; 597 continue;
580 if (imbalance <= IRQ_DELTA(max_loaded,j)) 598 if (imbalance <= IRQ_DELTA(max_loaded, j))
581 continue; 599 continue;
582 /* Try to find the IRQ that is closest to the imbalance 600 /* Try to find the IRQ that is closest to the imbalance
583 * without going over. 601 * without going over.
584 */ 602 */
585 if (move_this_load < IRQ_DELTA(max_loaded,j)) { 603 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
586 move_this_load = IRQ_DELTA(max_loaded,j); 604 move_this_load = IRQ_DELTA(max_loaded, j);
587 selected_irq = j; 605 selected_irq = j;
588 } 606 }
589 } 607 }
590 if (selected_irq == -1) { 608 if (selected_irq == -1)
591 goto tryanothercpu; 609 goto tryanothercpu;
592 }
593 610
594 imbalance = move_this_load; 611 imbalance = move_this_load;
595 612
596 /* For physical_balance case, we accumulated both load 613 /* For physical_balance case, we accumulated both load
597 * values in the one of the siblings cpu_irq[], 614 * values in the one of the siblings cpu_irq[],
598 * to use the same code for physical and logical processors 615 * to use the same code for physical and logical processors
599 * as much as possible. 616 * as much as possible.
600 * 617 *
601 * NOTE: the cpu_irq[] array holds the sum of the load for 618 * NOTE: the cpu_irq[] array holds the sum of the load for
602 * sibling A and sibling B in the slot for the lowest numbered 619 * sibling A and sibling B in the slot for the lowest numbered
@@ -625,11 +642,11 @@ tryanotherirq:
625 /* mark for change destination */ 642 /* mark for change destination */
626 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); 643 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
627 644
628 /* Since we made a change, come back sooner to 645 /* Since we made a change, come back sooner to
629 * check for more variation. 646 * check for more variation.
630 */ 647 */
631 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, 648 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
632 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); 649 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
633 return; 650 return;
634 } 651 }
635 goto tryanotherirq; 652 goto tryanotherirq;
@@ -640,7 +657,7 @@ not_worth_the_effort:
640 * upward 657 * upward
641 */ 658 */
642 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, 659 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
643 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); 660 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
644 return; 661 return;
645} 662}
646 663
@@ -679,13 +696,13 @@ static int __init balanced_irq_init(void)
679 cpumask_t tmp; 696 cpumask_t tmp;
680 697
681 cpus_shift_right(tmp, cpu_online_map, 2); 698 cpus_shift_right(tmp, cpu_online_map, 2);
682 c = &boot_cpu_data; 699 c = &boot_cpu_data;
683 /* When not overwritten by the command line ask subarchitecture. */ 700 /* When not overwritten by the command line ask subarchitecture. */
684 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) 701 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
685 irqbalance_disabled = NO_BALANCE_IRQ; 702 irqbalance_disabled = NO_BALANCE_IRQ;
686 if (irqbalance_disabled) 703 if (irqbalance_disabled)
687 return 0; 704 return 0;
688 705
689 /* disable irqbalance completely if there is only one processor online */ 706 /* disable irqbalance completely if there is only one processor online */
690 if (num_online_cpus() < 2) { 707 if (num_online_cpus() < 2) {
691 irqbalance_disabled = 1; 708 irqbalance_disabled = 1;
@@ -699,16 +716,14 @@ static int __init balanced_irq_init(void)
699 physical_balance = 1; 716 physical_balance = 1;
700 717
701 for_each_online_cpu(i) { 718 for_each_online_cpu(i) {
702 irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 719 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
703 irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); 720 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
704 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { 721 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
705 printk(KERN_ERR "balanced_irq_init: out of memory"); 722 printk(KERN_ERR "balanced_irq_init: out of memory");
706 goto failed; 723 goto failed;
707 } 724 }
708 memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
709 memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
710 } 725 }
711 726
712 printk(KERN_INFO "Starting balanced_irq\n"); 727 printk(KERN_INFO "Starting balanced_irq\n");
713 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) 728 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
714 return 0; 729 return 0;
@@ -801,10 +816,10 @@ static int find_irq_entry(int apic, int pin, int type)
801 int i; 816 int i;
802 817
803 for (i = 0; i < mp_irq_entries; i++) 818 for (i = 0; i < mp_irq_entries; i++)
804 if (mp_irqs[i].mpc_irqtype == type && 819 if (mp_irqs[i].mp_irqtype == type &&
805 (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || 820 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
806 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && 821 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
807 mp_irqs[i].mpc_dstirq == pin) 822 mp_irqs[i].mp_dstirq == pin)
808 return i; 823 return i;
809 824
810 return -1; 825 return -1;
@@ -818,13 +833,13 @@ static int __init find_isa_irq_pin(int irq, int type)
818 int i; 833 int i;
819 834
820 for (i = 0; i < mp_irq_entries; i++) { 835 for (i = 0; i < mp_irq_entries; i++) {
821 int lbus = mp_irqs[i].mpc_srcbus; 836 int lbus = mp_irqs[i].mp_srcbus;
822 837
823 if (test_bit(lbus, mp_bus_not_pci) && 838 if (test_bit(lbus, mp_bus_not_pci) &&
824 (mp_irqs[i].mpc_irqtype == type) && 839 (mp_irqs[i].mp_irqtype == type) &&
825 (mp_irqs[i].mpc_srcbusirq == irq)) 840 (mp_irqs[i].mp_srcbusirq == irq))
826 841
827 return mp_irqs[i].mpc_dstirq; 842 return mp_irqs[i].mp_dstirq;
828 } 843 }
829 return -1; 844 return -1;
830} 845}
@@ -834,17 +849,17 @@ static int __init find_isa_irq_apic(int irq, int type)
834 int i; 849 int i;
835 850
836 for (i = 0; i < mp_irq_entries; i++) { 851 for (i = 0; i < mp_irq_entries; i++) {
837 int lbus = mp_irqs[i].mpc_srcbus; 852 int lbus = mp_irqs[i].mp_srcbus;
838 853
839 if (test_bit(lbus, mp_bus_not_pci) && 854 if (test_bit(lbus, mp_bus_not_pci) &&
840 (mp_irqs[i].mpc_irqtype == type) && 855 (mp_irqs[i].mp_irqtype == type) &&
841 (mp_irqs[i].mpc_srcbusirq == irq)) 856 (mp_irqs[i].mp_srcbusirq == irq))
842 break; 857 break;
843 } 858 }
844 if (i < mp_irq_entries) { 859 if (i < mp_irq_entries) {
845 int apic; 860 int apic;
846 for(apic = 0; apic < nr_ioapics; apic++) { 861 for (apic = 0; apic < nr_ioapics; apic++) {
847 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) 862 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
848 return apic; 863 return apic;
849 } 864 }
850 } 865 }
@@ -864,28 +879,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
864 879
865 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " 880 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
866 "slot:%d, pin:%d.\n", bus, slot, pin); 881 "slot:%d, pin:%d.\n", bus, slot, pin);
867 if (mp_bus_id_to_pci_bus[bus] == -1) { 882 if (test_bit(bus, mp_bus_not_pci)) {
868 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 883 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
869 return -1; 884 return -1;
870 } 885 }
871 for (i = 0; i < mp_irq_entries; i++) { 886 for (i = 0; i < mp_irq_entries; i++) {
872 int lbus = mp_irqs[i].mpc_srcbus; 887 int lbus = mp_irqs[i].mp_srcbus;
873 888
874 for (apic = 0; apic < nr_ioapics; apic++) 889 for (apic = 0; apic < nr_ioapics; apic++)
875 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || 890 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
876 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) 891 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
877 break; 892 break;
878 893
879 if (!test_bit(lbus, mp_bus_not_pci) && 894 if (!test_bit(lbus, mp_bus_not_pci) &&
880 !mp_irqs[i].mpc_irqtype && 895 !mp_irqs[i].mp_irqtype &&
881 (bus == lbus) && 896 (bus == lbus) &&
882 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { 897 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
883 int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); 898 int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
884 899
885 if (!(apic || IO_APIC_IRQ(irq))) 900 if (!(apic || IO_APIC_IRQ(irq)))
886 continue; 901 continue;
887 902
888 if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) 903 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
889 return irq; 904 return irq;
890 /* 905 /*
891 * Use the first all-but-pin matching entry as a 906 * Use the first all-but-pin matching entry as a
@@ -900,7 +915,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
900EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); 915EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
901 916
902/* 917/*
903 * This function currently is only a helper for the i386 smp boot process where 918 * This function currently is only a helper for the i386 smp boot process where
904 * we need to reprogram the ioredtbls to cater for the cpus which have come online 919 * we need to reprogram the ioredtbls to cater for the cpus which have come online
905 * so mask in all cases should simply be TARGET_CPUS 920 * so mask in all cases should simply be TARGET_CPUS
906 */ 921 */
@@ -952,7 +967,7 @@ static int EISA_ELCR(unsigned int irq)
952 * EISA conforming in the MP table, that means its trigger type must 967 * EISA conforming in the MP table, that means its trigger type must
953 * be read in from the ELCR */ 968 * be read in from the ELCR */
954 969
955#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) 970#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
956#define default_EISA_polarity(idx) default_ISA_polarity(idx) 971#define default_EISA_polarity(idx) default_ISA_polarity(idx)
957 972
958/* PCI interrupts are always polarity one level triggered, 973/* PCI interrupts are always polarity one level triggered,
@@ -969,118 +984,115 @@ static int EISA_ELCR(unsigned int irq)
969 984
970static int MPBIOS_polarity(int idx) 985static int MPBIOS_polarity(int idx)
971{ 986{
972 int bus = mp_irqs[idx].mpc_srcbus; 987 int bus = mp_irqs[idx].mp_srcbus;
973 int polarity; 988 int polarity;
974 989
975 /* 990 /*
976 * Determine IRQ line polarity (high active or low active): 991 * Determine IRQ line polarity (high active or low active):
977 */ 992 */
978 switch (mp_irqs[idx].mpc_irqflag & 3) 993 switch (mp_irqs[idx].mp_irqflag & 3) {
994 case 0: /* conforms, ie. bus-type dependent polarity */
979 { 995 {
980 case 0: /* conforms, ie. bus-type dependent polarity */ 996 polarity = test_bit(bus, mp_bus_not_pci)?
981 { 997 default_ISA_polarity(idx):
982 polarity = test_bit(bus, mp_bus_not_pci)? 998 default_PCI_polarity(idx);
983 default_ISA_polarity(idx): 999 break;
984 default_PCI_polarity(idx); 1000 }
985 break; 1001 case 1: /* high active */
986 } 1002 {
987 case 1: /* high active */ 1003 polarity = 0;
988 { 1004 break;
989 polarity = 0; 1005 }
990 break; 1006 case 2: /* reserved */
991 } 1007 {
992 case 2: /* reserved */ 1008 printk(KERN_WARNING "broken BIOS!!\n");
993 { 1009 polarity = 1;
994 printk(KERN_WARNING "broken BIOS!!\n"); 1010 break;
995 polarity = 1; 1011 }
996 break; 1012 case 3: /* low active */
997 } 1013 {
998 case 3: /* low active */ 1014 polarity = 1;
999 { 1015 break;
1000 polarity = 1; 1016 }
1001 break; 1017 default: /* invalid */
1002 } 1018 {
1003 default: /* invalid */ 1019 printk(KERN_WARNING "broken BIOS!!\n");
1004 { 1020 polarity = 1;
1005 printk(KERN_WARNING "broken BIOS!!\n"); 1021 break;
1006 polarity = 1; 1022 }
1007 break;
1008 }
1009 } 1023 }
1010 return polarity; 1024 return polarity;
1011} 1025}
1012 1026
1013static int MPBIOS_trigger(int idx) 1027static int MPBIOS_trigger(int idx)
1014{ 1028{
1015 int bus = mp_irqs[idx].mpc_srcbus; 1029 int bus = mp_irqs[idx].mp_srcbus;
1016 int trigger; 1030 int trigger;
1017 1031
1018 /* 1032 /*
1019 * Determine IRQ trigger mode (edge or level sensitive): 1033 * Determine IRQ trigger mode (edge or level sensitive):
1020 */ 1034 */
1021 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) 1035 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
1036 case 0: /* conforms, ie. bus-type dependent */
1022 { 1037 {
1023 case 0: /* conforms, ie. bus-type dependent */ 1038 trigger = test_bit(bus, mp_bus_not_pci)?
1024 { 1039 default_ISA_trigger(idx):
1025 trigger = test_bit(bus, mp_bus_not_pci)? 1040 default_PCI_trigger(idx);
1026 default_ISA_trigger(idx):
1027 default_PCI_trigger(idx);
1028#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 1041#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1029 switch (mp_bus_id_to_type[bus]) 1042 switch (mp_bus_id_to_type[bus]) {
1030 { 1043 case MP_BUS_ISA: /* ISA pin */
1031 case MP_BUS_ISA: /* ISA pin */ 1044 {
1032 { 1045 /* set before the switch */
1033 /* set before the switch */
1034 break;
1035 }
1036 case MP_BUS_EISA: /* EISA pin */
1037 {
1038 trigger = default_EISA_trigger(idx);
1039 break;
1040 }
1041 case MP_BUS_PCI: /* PCI pin */
1042 {
1043 /* set before the switch */
1044 break;
1045 }
1046 case MP_BUS_MCA: /* MCA pin */
1047 {
1048 trigger = default_MCA_trigger(idx);
1049 break;
1050 }
1051 default:
1052 {
1053 printk(KERN_WARNING "broken BIOS!!\n");
1054 trigger = 1;
1055 break;
1056 }
1057 }
1058#endif
1059 break; 1046 break;
1060 } 1047 }
1061 case 1: /* edge */ 1048 case MP_BUS_EISA: /* EISA pin */
1062 { 1049 {
1063 trigger = 0; 1050 trigger = default_EISA_trigger(idx);
1064 break; 1051 break;
1065 } 1052 }
1066 case 2: /* reserved */ 1053 case MP_BUS_PCI: /* PCI pin */
1067 { 1054 {
1068 printk(KERN_WARNING "broken BIOS!!\n"); 1055 /* set before the switch */
1069 trigger = 1;
1070 break; 1056 break;
1071 } 1057 }
1072 case 3: /* level */ 1058 case MP_BUS_MCA: /* MCA pin */
1073 { 1059 {
1074 trigger = 1; 1060 trigger = default_MCA_trigger(idx);
1075 break; 1061 break;
1076 } 1062 }
1077 default: /* invalid */ 1063 default:
1078 { 1064 {
1079 printk(KERN_WARNING "broken BIOS!!\n"); 1065 printk(KERN_WARNING "broken BIOS!!\n");
1080 trigger = 0; 1066 trigger = 1;
1081 break; 1067 break;
1082 } 1068 }
1083 } 1069 }
1070#endif
1071 break;
1072 }
1073 case 1: /* edge */
1074 {
1075 trigger = 0;
1076 break;
1077 }
1078 case 2: /* reserved */
1079 {
1080 printk(KERN_WARNING "broken BIOS!!\n");
1081 trigger = 1;
1082 break;
1083 }
1084 case 3: /* level */
1085 {
1086 trigger = 1;
1087 break;
1088 }
1089 default: /* invalid */
1090 {
1091 printk(KERN_WARNING "broken BIOS!!\n");
1092 trigger = 0;
1093 break;
1094 }
1095 }
1084 return trigger; 1096 return trigger;
1085} 1097}
1086 1098
@@ -1097,16 +1109,16 @@ static inline int irq_trigger(int idx)
1097static int pin_2_irq(int idx, int apic, int pin) 1109static int pin_2_irq(int idx, int apic, int pin)
1098{ 1110{
1099 int irq, i; 1111 int irq, i;
1100 int bus = mp_irqs[idx].mpc_srcbus; 1112 int bus = mp_irqs[idx].mp_srcbus;
1101 1113
1102 /* 1114 /*
1103 * Debugging check, we are in big trouble if this message pops up! 1115 * Debugging check, we are in big trouble if this message pops up!
1104 */ 1116 */
1105 if (mp_irqs[idx].mpc_dstirq != pin) 1117 if (mp_irqs[idx].mp_dstirq != pin)
1106 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1118 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1107 1119
1108 if (test_bit(bus, mp_bus_not_pci)) 1120 if (test_bit(bus, mp_bus_not_pci))
1109 irq = mp_irqs[idx].mpc_srcbusirq; 1121 irq = mp_irqs[idx].mp_srcbusirq;
1110 else { 1122 else {
1111 /* 1123 /*
1112 * PCI IRQs are mapped in order 1124 * PCI IRQs are mapped in order
@@ -1148,8 +1160,8 @@ static inline int IO_APIC_irq_trigger(int irq)
1148 1160
1149 for (apic = 0; apic < nr_ioapics; apic++) { 1161 for (apic = 0; apic < nr_ioapics; apic++) {
1150 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1162 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1151 idx = find_irq_entry(apic,pin,mp_INT); 1163 idx = find_irq_entry(apic, pin, mp_INT);
1152 if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) 1164 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1153 return irq_trigger(idx); 1165 return irq_trigger(idx);
1154 } 1166 }
1155 } 1167 }
@@ -1164,7 +1176,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }
1164 1176
1165static int __assign_irq_vector(int irq) 1177static int __assign_irq_vector(int irq)
1166{ 1178{
1167 static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; 1179 static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
1168 int vector, offset; 1180 int vector, offset;
1169 1181
1170 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); 1182 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
@@ -1176,7 +1188,7 @@ static int __assign_irq_vector(int irq)
1176 offset = current_offset; 1188 offset = current_offset;
1177next: 1189next:
1178 vector += 8; 1190 vector += 8;
1179 if (vector >= FIRST_SYSTEM_VECTOR) { 1191 if (vector >= first_system_vector) {
1180 offset = (offset + 1) % 8; 1192 offset = (offset + 1) % 8;
1181 vector = FIRST_DEVICE_VECTOR + offset; 1193 vector = FIRST_DEVICE_VECTOR + offset;
1182 } 1194 }
@@ -1203,6 +1215,11 @@ static int assign_irq_vector(int irq)
1203 1215
1204 return vector; 1216 return vector;
1205} 1217}
1218
1219void setup_vector_irq(int cpu)
1220{
1221}
1222
1206static struct irq_chip ioapic_chip; 1223static struct irq_chip ioapic_chip;
1207 1224
1208#define IOAPIC_AUTO -1 1225#define IOAPIC_AUTO -1
@@ -1237,25 +1254,25 @@ static void __init setup_IO_APIC_irqs(void)
1237 /* 1254 /*
1238 * add it to the IO-APIC irq-routing table: 1255 * add it to the IO-APIC irq-routing table:
1239 */ 1256 */
1240 memset(&entry,0,sizeof(entry)); 1257 memset(&entry, 0, sizeof(entry));
1241 1258
1242 entry.delivery_mode = INT_DELIVERY_MODE; 1259 entry.delivery_mode = INT_DELIVERY_MODE;
1243 entry.dest_mode = INT_DEST_MODE; 1260 entry.dest_mode = INT_DEST_MODE;
1244 entry.mask = 0; /* enable IRQ */ 1261 entry.mask = 0; /* enable IRQ */
1245 entry.dest.logical.logical_dest = 1262 entry.dest.logical.logical_dest =
1246 cpu_mask_to_apicid(TARGET_CPUS); 1263 cpu_mask_to_apicid(TARGET_CPUS);
1247 1264
1248 idx = find_irq_entry(apic,pin,mp_INT); 1265 idx = find_irq_entry(apic, pin, mp_INT);
1249 if (idx == -1) { 1266 if (idx == -1) {
1250 if (first_notcon) { 1267 if (first_notcon) {
1251 apic_printk(APIC_VERBOSE, KERN_DEBUG 1268 apic_printk(APIC_VERBOSE, KERN_DEBUG
1252 " IO-APIC (apicid-pin) %d-%d", 1269 " IO-APIC (apicid-pin) %d-%d",
1253 mp_ioapics[apic].mpc_apicid, 1270 mp_ioapics[apic].mp_apicid,
1254 pin); 1271 pin);
1255 first_notcon = 0; 1272 first_notcon = 0;
1256 } else 1273 } else
1257 apic_printk(APIC_VERBOSE, ", %d-%d", 1274 apic_printk(APIC_VERBOSE, ", %d-%d",
1258 mp_ioapics[apic].mpc_apicid, pin); 1275 mp_ioapics[apic].mp_apicid, pin);
1259 continue; 1276 continue;
1260 } 1277 }
1261 1278
@@ -1289,7 +1306,7 @@ static void __init setup_IO_APIC_irqs(void)
1289 vector = assign_irq_vector(irq); 1306 vector = assign_irq_vector(irq);
1290 entry.vector = vector; 1307 entry.vector = vector;
1291 ioapic_register_intr(irq, vector, IOAPIC_AUTO); 1308 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1292 1309
1293 if (!apic && (irq < 16)) 1310 if (!apic && (irq < 16))
1294 disable_8259A_irq(irq); 1311 disable_8259A_irq(irq);
1295 } 1312 }
@@ -1302,25 +1319,21 @@ static void __init setup_IO_APIC_irqs(void)
1302} 1319}
1303 1320
1304/* 1321/*
1305 * Set up the 8259A-master output pin: 1322 * Set up the timer pin, possibly with the 8259A-master behind.
1306 */ 1323 */
1307static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) 1324static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1325 int vector)
1308{ 1326{
1309 struct IO_APIC_route_entry entry; 1327 struct IO_APIC_route_entry entry;
1310 1328
1311 memset(&entry,0,sizeof(entry)); 1329 memset(&entry, 0, sizeof(entry));
1312
1313 disable_8259A_irq(0);
1314
1315 /* mask LVT0 */
1316 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1317 1330
1318 /* 1331 /*
1319 * We use logical delivery to get the timer IRQ 1332 * We use logical delivery to get the timer IRQ
1320 * to the first CPU. 1333 * to the first CPU.
1321 */ 1334 */
1322 entry.dest_mode = INT_DEST_MODE; 1335 entry.dest_mode = INT_DEST_MODE;
1323 entry.mask = 0; /* unmask IRQ now */ 1336 entry.mask = 1; /* mask IRQ now */
1324 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 1337 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1325 entry.delivery_mode = INT_DELIVERY_MODE; 1338 entry.delivery_mode = INT_DELIVERY_MODE;
1326 entry.polarity = 0; 1339 entry.polarity = 0;
@@ -1329,17 +1342,14 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
1329 1342
1330 /* 1343 /*
1331 * The timer IRQ doesn't have to know that behind the 1344 * The timer IRQ doesn't have to know that behind the
1332 * scene we have a 8259A-master in AEOI mode ... 1345 * scene we may have a 8259A-master in AEOI mode ...
1333 */ 1346 */
1334 irq_desc[0].chip = &ioapic_chip; 1347 ioapic_register_intr(0, vector, IOAPIC_EDGE);
1335 set_irq_handler(0, handle_edge_irq);
1336 1348
1337 /* 1349 /*
1338 * Add it to the IO-APIC irq-routing table: 1350 * Add it to the IO-APIC irq-routing table:
1339 */ 1351 */
1340 ioapic_write_entry(apic, pin, entry); 1352 ioapic_write_entry(apic, pin, entry);
1341
1342 enable_8259A_irq(0);
1343} 1353}
1344 1354
1345void __init print_IO_APIC(void) 1355void __init print_IO_APIC(void)
@@ -1354,10 +1364,10 @@ void __init print_IO_APIC(void)
1354 if (apic_verbosity == APIC_QUIET) 1364 if (apic_verbosity == APIC_QUIET)
1355 return; 1365 return;
1356 1366
1357 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1367 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1358 for (i = 0; i < nr_ioapics; i++) 1368 for (i = 0; i < nr_ioapics; i++)
1359 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1369 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1360 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); 1370 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1361 1371
1362 /* 1372 /*
1363 * We are a bit conservative about what we expect. We have to 1373 * We are a bit conservative about what we expect. We have to
@@ -1376,7 +1386,7 @@ void __init print_IO_APIC(void)
1376 reg_03.raw = io_apic_read(apic, 3); 1386 reg_03.raw = io_apic_read(apic, 3);
1377 spin_unlock_irqrestore(&ioapic_lock, flags); 1387 spin_unlock_irqrestore(&ioapic_lock, flags);
1378 1388
1379 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 1389 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1380 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1390 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1381 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1391 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1382 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1392 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1459,7 +1469,7 @@ void __init print_IO_APIC(void)
1459 1469
1460#if 0 1470#if 0
1461 1471
1462static void print_APIC_bitfield (int base) 1472static void print_APIC_bitfield(int base)
1463{ 1473{
1464 unsigned int v; 1474 unsigned int v;
1465 int i, j; 1475 int i, j;
@@ -1480,7 +1490,7 @@ static void print_APIC_bitfield (int base)
1480 } 1490 }
1481} 1491}
1482 1492
1483void /*__init*/ print_local_APIC(void * dummy) 1493void /*__init*/ print_local_APIC(void *dummy)
1484{ 1494{
1485 unsigned int v, ver, maxlvt; 1495 unsigned int v, ver, maxlvt;
1486 1496
@@ -1489,6 +1499,7 @@ void /*__init*/ print_local_APIC(void * dummy)
1489 1499
1490 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1500 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1491 smp_processor_id(), hard_smp_processor_id()); 1501 smp_processor_id(), hard_smp_processor_id());
1502 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1503 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1504 GET_APIC_ID(read_apic_id()));
1494 v = apic_read(APIC_LVR); 1505 v = apic_read(APIC_LVR);
@@ -1563,7 +1574,7 @@ void /*__init*/ print_local_APIC(void * dummy)
1563 printk("\n"); 1574 printk("\n");
1564} 1575}
1565 1576
1566void print_all_local_APICs (void) 1577void print_all_local_APICs(void)
1567{ 1578{
1568 on_each_cpu(print_local_APIC, NULL, 1, 1); 1579 on_each_cpu(print_local_APIC, NULL, 1, 1);
1569} 1580}
@@ -1586,11 +1597,11 @@ void /*__init*/ print_PIC(void)
1586 v = inb(0xa0) << 8 | inb(0x20); 1597 v = inb(0xa0) << 8 | inb(0x20);
1587 printk(KERN_DEBUG "... PIC IRR: %04x\n", v); 1598 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1588 1599
1589 outb(0x0b,0xa0); 1600 outb(0x0b, 0xa0);
1590 outb(0x0b,0x20); 1601 outb(0x0b, 0x20);
1591 v = inb(0xa0) << 8 | inb(0x20); 1602 v = inb(0xa0) << 8 | inb(0x20);
1592 outb(0x0a,0xa0); 1603 outb(0x0a, 0xa0);
1593 outb(0x0a,0x20); 1604 outb(0x0a, 0x20);
1594 1605
1595 spin_unlock_irqrestore(&i8259A_lock, flags); 1606 spin_unlock_irqrestore(&i8259A_lock, flags);
1596 1607
@@ -1626,7 +1637,7 @@ static void __init enable_IO_APIC(void)
1626 spin_unlock_irqrestore(&ioapic_lock, flags); 1637 spin_unlock_irqrestore(&ioapic_lock, flags);
1627 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 1638 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1628 } 1639 }
1629 for(apic = 0; apic < nr_ioapics; apic++) { 1640 for (apic = 0; apic < nr_ioapics; apic++) {
1630 int pin; 1641 int pin;
1631 /* See if any of the pins is in ExtINT mode */ 1642 /* See if any of the pins is in ExtINT mode */
1632 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1643 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@ -1716,7 +1727,6 @@ void disable_IO_APIC(void)
1716 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1727 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1717 */ 1728 */
1718 1729
1719#ifndef CONFIG_X86_NUMAQ
1720static void __init setup_ioapic_ids_from_mpc(void) 1730static void __init setup_ioapic_ids_from_mpc(void)
1721{ 1731{
1722 union IO_APIC_reg_00 reg_00; 1732 union IO_APIC_reg_00 reg_00;
@@ -1726,6 +1736,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
1726 unsigned char old_id; 1736 unsigned char old_id;
1727 unsigned long flags; 1737 unsigned long flags;
1728 1738
1739#ifdef CONFIG_X86_NUMAQ
1740 if (found_numaq)
1741 return;
1742#endif
1743
1729 /* 1744 /*
1730 * Don't check I/O APIC IDs for xAPIC systems. They have 1745 * Don't check I/O APIC IDs for xAPIC systems. They have
1731 * no meaning without the serial APIC bus. 1746 * no meaning without the serial APIC bus.
@@ -1748,15 +1763,15 @@ static void __init setup_ioapic_ids_from_mpc(void)
1748 spin_lock_irqsave(&ioapic_lock, flags); 1763 spin_lock_irqsave(&ioapic_lock, flags);
1749 reg_00.raw = io_apic_read(apic, 0); 1764 reg_00.raw = io_apic_read(apic, 0);
1750 spin_unlock_irqrestore(&ioapic_lock, flags); 1765 spin_unlock_irqrestore(&ioapic_lock, flags);
1751
1752 old_id = mp_ioapics[apic].mpc_apicid;
1753 1766
1754 if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { 1767 old_id = mp_ioapics[apic].mp_apicid;
1768
1769 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1755 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 1770 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1756 apic, mp_ioapics[apic].mpc_apicid); 1771 apic, mp_ioapics[apic].mp_apicid);
1757 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 1772 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1758 reg_00.bits.ID); 1773 reg_00.bits.ID);
1759 mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; 1774 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1760 } 1775 }
1761 1776
1762 /* 1777 /*
@@ -1765,9 +1780,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
1765 * 'stuck on smp_invalidate_needed IPI wait' messages. 1780 * 'stuck on smp_invalidate_needed IPI wait' messages.
1766 */ 1781 */
1767 if (check_apicid_used(phys_id_present_map, 1782 if (check_apicid_used(phys_id_present_map,
1768 mp_ioapics[apic].mpc_apicid)) { 1783 mp_ioapics[apic].mp_apicid)) {
1769 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 1784 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1770 apic, mp_ioapics[apic].mpc_apicid); 1785 apic, mp_ioapics[apic].mp_apicid);
1771 for (i = 0; i < get_physical_broadcast(); i++) 1786 for (i = 0; i < get_physical_broadcast(); i++)
1772 if (!physid_isset(i, phys_id_present_map)) 1787 if (!physid_isset(i, phys_id_present_map))
1773 break; 1788 break;
@@ -1776,13 +1791,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
1776 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 1791 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1777 i); 1792 i);
1778 physid_set(i, phys_id_present_map); 1793 physid_set(i, phys_id_present_map);
1779 mp_ioapics[apic].mpc_apicid = i; 1794 mp_ioapics[apic].mp_apicid = i;
1780 } else { 1795 } else {
1781 physid_mask_t tmp; 1796 physid_mask_t tmp;
1782 tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); 1797 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1783 apic_printk(APIC_VERBOSE, "Setting %d in the " 1798 apic_printk(APIC_VERBOSE, "Setting %d in the "
1784 "phys_id_present_map\n", 1799 "phys_id_present_map\n",
1785 mp_ioapics[apic].mpc_apicid); 1800 mp_ioapics[apic].mp_apicid);
1786 physids_or(phys_id_present_map, phys_id_present_map, tmp); 1801 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1787 } 1802 }
1788 1803
@@ -1791,21 +1806,21 @@ static void __init setup_ioapic_ids_from_mpc(void)
1791 * We need to adjust the IRQ routing table 1806 * We need to adjust the IRQ routing table
1792 * if the ID changed. 1807 * if the ID changed.
1793 */ 1808 */
1794 if (old_id != mp_ioapics[apic].mpc_apicid) 1809 if (old_id != mp_ioapics[apic].mp_apicid)
1795 for (i = 0; i < mp_irq_entries; i++) 1810 for (i = 0; i < mp_irq_entries; i++)
1796 if (mp_irqs[i].mpc_dstapic == old_id) 1811 if (mp_irqs[i].mp_dstapic == old_id)
1797 mp_irqs[i].mpc_dstapic 1812 mp_irqs[i].mp_dstapic
1798 = mp_ioapics[apic].mpc_apicid; 1813 = mp_ioapics[apic].mp_apicid;
1799 1814
1800 /* 1815 /*
1801 * Read the right value from the MPC table and 1816 * Read the right value from the MPC table and
1802 * write it into the ID register. 1817 * write it into the ID register.
1803 */ 1818 */
1804 apic_printk(APIC_VERBOSE, KERN_INFO 1819 apic_printk(APIC_VERBOSE, KERN_INFO
1805 "...changing IO-APIC physical APIC ID to %d ...", 1820 "...changing IO-APIC physical APIC ID to %d ...",
1806 mp_ioapics[apic].mpc_apicid); 1821 mp_ioapics[apic].mp_apicid);
1807 1822
1808 reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; 1823 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1809 spin_lock_irqsave(&ioapic_lock, flags); 1824 spin_lock_irqsave(&ioapic_lock, flags);
1810 io_apic_write(apic, 0, reg_00.raw); 1825 io_apic_write(apic, 0, reg_00.raw);
1811 spin_unlock_irqrestore(&ioapic_lock, flags); 1826 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -1816,15 +1831,12 @@ static void __init setup_ioapic_ids_from_mpc(void)
1816 spin_lock_irqsave(&ioapic_lock, flags); 1831 spin_lock_irqsave(&ioapic_lock, flags);
1817 reg_00.raw = io_apic_read(apic, 0); 1832 reg_00.raw = io_apic_read(apic, 0);
1818 spin_unlock_irqrestore(&ioapic_lock, flags); 1833 spin_unlock_irqrestore(&ioapic_lock, flags);
1819 if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) 1834 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1820 printk("could not set ID!\n"); 1835 printk("could not set ID!\n");
1821 else 1836 else
1822 apic_printk(APIC_VERBOSE, " ok.\n"); 1837 apic_printk(APIC_VERBOSE, " ok.\n");
1823 } 1838 }
1824} 1839}
1825#else
1826static void __init setup_ioapic_ids_from_mpc(void) { }
1827#endif
1828 1840
1829int no_timer_check __initdata; 1841int no_timer_check __initdata;
1830 1842
@@ -2015,12 +2027,12 @@ static inline void init_IO_APIC_traps(void)
2015 * The local APIC irq-chip implementation: 2027 * The local APIC irq-chip implementation:
2016 */ 2028 */
2017 2029
2018static void ack_apic(unsigned int irq) 2030static void ack_lapic_irq(unsigned int irq)
2019{ 2031{
2020 ack_APIC_irq(); 2032 ack_APIC_irq();
2021} 2033}
2022 2034
2023static void mask_lapic_irq (unsigned int irq) 2035static void mask_lapic_irq(unsigned int irq)
2024{ 2036{
2025 unsigned long v; 2037 unsigned long v;
2026 2038
@@ -2028,7 +2040,7 @@ static void mask_lapic_irq (unsigned int irq)
2028 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); 2040 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
2029} 2041}
2030 2042
2031static void unmask_lapic_irq (unsigned int irq) 2043static void unmask_lapic_irq(unsigned int irq)
2032{ 2044{
2033 unsigned long v; 2045 unsigned long v;
2034 2046
@@ -2037,23 +2049,31 @@ static void unmask_lapic_irq (unsigned int irq)
2037} 2049}
2038 2050
2039static struct irq_chip lapic_chip __read_mostly = { 2051static struct irq_chip lapic_chip __read_mostly = {
2040 .name = "local-APIC-edge", 2052 .name = "local-APIC",
2041 .mask = mask_lapic_irq, 2053 .mask = mask_lapic_irq,
2042 .unmask = unmask_lapic_irq, 2054 .unmask = unmask_lapic_irq,
2043 .eoi = ack_apic, 2055 .ack = ack_lapic_irq,
2044}; 2056};
2045 2057
2058static void lapic_register_intr(int irq, int vector)
2059{
2060 irq_desc[irq].status &= ~IRQ_LEVEL;
2061 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2062 "edge");
2063 set_intr_gate(vector, interrupt[irq]);
2064}
2065
2046static void __init setup_nmi(void) 2066static void __init setup_nmi(void)
2047{ 2067{
2048 /* 2068 /*
2049 * Dirty trick to enable the NMI watchdog ... 2069 * Dirty trick to enable the NMI watchdog ...
2050 * We put the 8259A master into AEOI mode and 2070 * We put the 8259A master into AEOI mode and
2051 * unmask on all local APICs LVT0 as NMI. 2071 * unmask on all local APICs LVT0 as NMI.
2052 * 2072 *
2053 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 2073 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2054 * is from Maciej W. Rozycki - so we do not have to EOI from 2074 * is from Maciej W. Rozycki - so we do not have to EOI from
2055 * the NMI handler or the timer interrupt. 2075 * the NMI handler or the timer interrupt.
2056 */ 2076 */
2057 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); 2077 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2058 2078
2059 enable_NMI_through_LVT0(); 2079 enable_NMI_through_LVT0();
@@ -2129,11 +2149,16 @@ static inline void __init unlock_ExtINT_logic(void)
2129static inline void __init check_timer(void) 2149static inline void __init check_timer(void)
2130{ 2150{
2131 int apic1, pin1, apic2, pin2; 2151 int apic1, pin1, apic2, pin2;
2152 int no_pin1 = 0;
2132 int vector; 2153 int vector;
2154 unsigned int ver;
2133 unsigned long flags; 2155 unsigned long flags;
2134 2156
2135 local_irq_save(flags); 2157 local_irq_save(flags);
2136 2158
2159 ver = apic_read(APIC_LVR);
2160 ver = GET_APIC_VERSION(ver);
2161
2137 /* 2162 /*
2138 * get/set the timer IRQ vector: 2163 * get/set the timer IRQ vector:
2139 */ 2164 */
@@ -2142,17 +2167,17 @@ static inline void __init check_timer(void)
2142 set_intr_gate(vector, interrupt[0]); 2167 set_intr_gate(vector, interrupt[0]);
2143 2168
2144 /* 2169 /*
2145 * Subtle, code in do_timer_interrupt() expects an AEOI 2170 * As IRQ0 is to be enabled in the 8259A, the virtual
2146 * mode for the 8259A whenever interrupts are routed 2171 * wire has to be disabled in the local APIC. Also
2147 * through I/O APICs. Also IRQ0 has to be enabled in 2172 * timer interrupts need to be acknowledged manually in
2148 * the 8259A which implies the virtual wire has to be 2173 * the 8259A for the i82489DX when using the NMI
2149 * disabled in the local APIC. 2174 * watchdog as that APIC treats NMIs as level-triggered.
2175 * The AEOI mode will finish them in the 8259A
2176 * automatically.
2150 */ 2177 */
2151 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2178 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2152 init_8259A(1); 2179 init_8259A(1);
2153 timer_ack = 1; 2180 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2154 if (timer_over_8254 > 0)
2155 enable_8259A_irq(0);
2156 2181
2157 pin1 = find_isa_irq_pin(0, mp_INT); 2182 pin1 = find_isa_irq_pin(0, mp_INT);
2158 apic1 = find_isa_irq_apic(0, mp_INT); 2183 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2162,14 +2187,36 @@ static inline void __init check_timer(void)
2162 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 2187 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2163 vector, apic1, pin1, apic2, pin2); 2188 vector, apic1, pin1, apic2, pin2);
2164 2189
2190 if (mask_ioapic_irq_2)
2191 mask_IO_APIC_irq(2);
2192
2193 /*
2194 * Some BIOS writers are clueless and report the ExtINTA
2195 * I/O APIC input from the cascaded 8259A as the timer
2196 * interrupt input. So just in case, if only one pin
2197 * was found above, try it both directly and through the
2198 * 8259A.
2199 */
2200 if (pin1 == -1) {
2201 pin1 = pin2;
2202 apic1 = apic2;
2203 no_pin1 = 1;
2204 } else if (pin2 == -1) {
2205 pin2 = pin1;
2206 apic2 = apic1;
2207 }
2208
2165 if (pin1 != -1) { 2209 if (pin1 != -1) {
2166 /* 2210 /*
2167 * Ok, does IRQ0 through the IOAPIC work? 2211 * Ok, does IRQ0 through the IOAPIC work?
2168 */ 2212 */
2213 if (no_pin1) {
2214 add_pin_to_irq(0, apic1, pin1);
2215 setup_timer_IRQ0_pin(apic1, pin1, vector);
2216 }
2169 unmask_IO_APIC_irq(0); 2217 unmask_IO_APIC_irq(0);
2170 if (timer_irq_works()) { 2218 if (timer_irq_works()) {
2171 if (nmi_watchdog == NMI_IO_APIC) { 2219 if (nmi_watchdog == NMI_IO_APIC) {
2172 disable_8259A_irq(0);
2173 setup_nmi(); 2220 setup_nmi();
2174 enable_8259A_irq(0); 2221 enable_8259A_irq(0);
2175 } 2222 }
@@ -2178,45 +2225,47 @@ static inline void __init check_timer(void)
2178 goto out; 2225 goto out;
2179 } 2226 }
2180 clear_IO_APIC_pin(apic1, pin1); 2227 clear_IO_APIC_pin(apic1, pin1);
2181 printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " 2228 if (!no_pin1)
2182 "IO-APIC\n"); 2229 printk(KERN_ERR "..MP-BIOS bug: "
2183 } 2230 "8254 timer not connected to IO-APIC\n");
2184 2231
2185 printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); 2232 printk(KERN_INFO "...trying to set up timer (IRQ0) "
2186 if (pin2 != -1) { 2233 "through the 8259A ... ");
2187 printk("\n..... (found pin %d) ...", pin2); 2234 printk("\n..... (found pin %d) ...", pin2);
2188 /* 2235 /*
2189 * legacy devices should be connected to IO APIC #0 2236 * legacy devices should be connected to IO APIC #0
2190 */ 2237 */
2191 setup_ExtINT_IRQ0_pin(apic2, pin2, vector); 2238 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2239 setup_timer_IRQ0_pin(apic2, pin2, vector);
2240 unmask_IO_APIC_irq(0);
2241 enable_8259A_irq(0);
2192 if (timer_irq_works()) { 2242 if (timer_irq_works()) {
2193 printk("works.\n"); 2243 printk("works.\n");
2194 if (pin1 != -1) 2244 timer_through_8259 = 1;
2195 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2196 else
2197 add_pin_to_irq(0, apic2, pin2);
2198 if (nmi_watchdog == NMI_IO_APIC) { 2245 if (nmi_watchdog == NMI_IO_APIC) {
2246 disable_8259A_irq(0);
2199 setup_nmi(); 2247 setup_nmi();
2248 enable_8259A_irq(0);
2200 } 2249 }
2201 goto out; 2250 goto out;
2202 } 2251 }
2203 /* 2252 /*
2204 * Cleanup, just in case ... 2253 * Cleanup, just in case ...
2205 */ 2254 */
2255 disable_8259A_irq(0);
2206 clear_IO_APIC_pin(apic2, pin2); 2256 clear_IO_APIC_pin(apic2, pin2);
2257 printk(" failed.\n");
2207 } 2258 }
2208 printk(" failed.\n");
2209 2259
2210 if (nmi_watchdog == NMI_IO_APIC) { 2260 if (nmi_watchdog == NMI_IO_APIC) {
2211 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 2261 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
2212 nmi_watchdog = 0; 2262 nmi_watchdog = NMI_NONE;
2213 } 2263 }
2264 timer_ack = 0;
2214 2265
2215 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 2266 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2216 2267
2217 disable_8259A_irq(0); 2268 lapic_register_intr(0, vector);
2218 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2219 "fasteoi");
2220 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ 2269 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2221 enable_8259A_irq(0); 2270 enable_8259A_irq(0);
2222 2271
@@ -2224,12 +2273,12 @@ static inline void __init check_timer(void)
2224 printk(" works.\n"); 2273 printk(" works.\n");
2225 goto out; 2274 goto out;
2226 } 2275 }
2276 disable_8259A_irq(0);
2227 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); 2277 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2228 printk(" failed.\n"); 2278 printk(" failed.\n");
2229 2279
2230 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); 2280 printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
2231 2281
2232 timer_ack = 0;
2233 init_8259A(0); 2282 init_8259A(0);
2234 make_8259A_irq(0); 2283 make_8259A_irq(0);
2235 apic_write_around(APIC_LVT0, APIC_DM_EXTINT); 2284 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
@@ -2248,11 +2297,21 @@ out:
2248} 2297}
2249 2298
2250/* 2299/*
2251 * 2300 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2252 * IRQ's that are handled by the PIC in the MPS IOAPIC case. 2301 * to devices. However there may be an I/O APIC pin available for
2253 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. 2302 * this interrupt regardless. The pin may be left unconnected, but
2254 * Linux doesn't really care, as it's not actually used 2303 * typically it will be reused as an ExtINT cascade interrupt for
2255 * for any interrupt handling anyway. 2304 * the master 8259A. In the MPS case such a pin will normally be
2305 * reported as an ExtINT interrupt in the MP table. With ACPI
2306 * there is no provision for ExtINT interrupts, and in the absence
2307 * of an override it would be treated as an ordinary ISA I/O APIC
2308 * interrupt, that is edge-triggered and unmasked by default. We
2309 * used to do this, but it caused problems on some systems because
2310 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2311 * the same ExtINT cascade interrupt to drive the local APIC of the
2312 * bootstrap processor. Therefore we refrain from routing IRQ2 to
2313 * the I/O APIC in all cases now. No actual device should request
2314 * it anyway. --macro
2256 */ 2315 */
2257#define PIC_IRQS (1 << PIC_CASCADE_IR) 2316#define PIC_IRQS (1 << PIC_CASCADE_IR)
2258 2317
@@ -2261,15 +2320,12 @@ void __init setup_IO_APIC(void)
2261 int i; 2320 int i;
2262 2321
2263 /* Reserve all the system vectors. */ 2322 /* Reserve all the system vectors. */
2264 for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) 2323 for (i = first_system_vector; i < NR_VECTORS; i++)
2265 set_bit(i, used_vectors); 2324 set_bit(i, used_vectors);
2266 2325
2267 enable_IO_APIC(); 2326 enable_IO_APIC();
2268 2327
2269 if (acpi_ioapic) 2328 io_apic_irqs = ~PIC_IRQS;
2270 io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
2271 else
2272 io_apic_irqs = ~PIC_IRQS;
2273 2329
2274 printk("ENABLING IO-APIC IRQs\n"); 2330 printk("ENABLING IO-APIC IRQs\n");
2275 2331
@@ -2286,28 +2342,14 @@ void __init setup_IO_APIC(void)
2286 print_IO_APIC(); 2342 print_IO_APIC();
2287} 2343}
2288 2344
2289static int __init setup_disable_8254_timer(char *s)
2290{
2291 timer_over_8254 = -1;
2292 return 1;
2293}
2294static int __init setup_enable_8254_timer(char *s)
2295{
2296 timer_over_8254 = 2;
2297 return 1;
2298}
2299
2300__setup("disable_8254_timer", setup_disable_8254_timer);
2301__setup("enable_8254_timer", setup_enable_8254_timer);
2302
2303/* 2345/*
2304 * Called after all the initialization is done. If we didnt find any 2346 * Called after all the initialization is done. If we didnt find any
2305 * APIC bugs then we can allow the modify fast path 2347 * APIC bugs then we can allow the modify fast path
2306 */ 2348 */
2307 2349
2308static int __init io_apic_bug_finalize(void) 2350static int __init io_apic_bug_finalize(void)
2309{ 2351{
2310 if(sis_apic_bug == -1) 2352 if (sis_apic_bug == -1)
2311 sis_apic_bug = 0; 2353 sis_apic_bug = 0;
2312 return 0; 2354 return 0;
2313} 2355}
@@ -2318,17 +2360,17 @@ struct sysfs_ioapic_data {
2318 struct sys_device dev; 2360 struct sys_device dev;
2319 struct IO_APIC_route_entry entry[0]; 2361 struct IO_APIC_route_entry entry[0];
2320}; 2362};
2321static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; 2363static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
2322 2364
2323static int ioapic_suspend(struct sys_device *dev, pm_message_t state) 2365static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2324{ 2366{
2325 struct IO_APIC_route_entry *entry; 2367 struct IO_APIC_route_entry *entry;
2326 struct sysfs_ioapic_data *data; 2368 struct sysfs_ioapic_data *data;
2327 int i; 2369 int i;
2328 2370
2329 data = container_of(dev, struct sysfs_ioapic_data, dev); 2371 data = container_of(dev, struct sysfs_ioapic_data, dev);
2330 entry = data->entry; 2372 entry = data->entry;
2331 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) 2373 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2332 entry[i] = ioapic_read_entry(dev->id, i); 2374 entry[i] = ioapic_read_entry(dev->id, i);
2333 2375
2334 return 0; 2376 return 0;
@@ -2341,18 +2383,18 @@ static int ioapic_resume(struct sys_device *dev)
2341 unsigned long flags; 2383 unsigned long flags;
2342 union IO_APIC_reg_00 reg_00; 2384 union IO_APIC_reg_00 reg_00;
2343 int i; 2385 int i;
2344 2386
2345 data = container_of(dev, struct sysfs_ioapic_data, dev); 2387 data = container_of(dev, struct sysfs_ioapic_data, dev);
2346 entry = data->entry; 2388 entry = data->entry;
2347 2389
2348 spin_lock_irqsave(&ioapic_lock, flags); 2390 spin_lock_irqsave(&ioapic_lock, flags);
2349 reg_00.raw = io_apic_read(dev->id, 0); 2391 reg_00.raw = io_apic_read(dev->id, 0);
2350 if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { 2392 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2351 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 2393 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2352 io_apic_write(dev->id, 0, reg_00.raw); 2394 io_apic_write(dev->id, 0, reg_00.raw);
2353 } 2395 }
2354 spin_unlock_irqrestore(&ioapic_lock, flags); 2396 spin_unlock_irqrestore(&ioapic_lock, flags);
2355 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) 2397 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2356 ioapic_write_entry(dev->id, i, entry[i]); 2398 ioapic_write_entry(dev->id, i, entry[i]);
2357 2399
2358 return 0; 2400 return 0;
@@ -2366,24 +2408,23 @@ static struct sysdev_class ioapic_sysdev_class = {
2366 2408
2367static int __init ioapic_init_sysfs(void) 2409static int __init ioapic_init_sysfs(void)
2368{ 2410{
2369 struct sys_device * dev; 2411 struct sys_device *dev;
2370 int i, size, error = 0; 2412 int i, size, error = 0;
2371 2413
2372 error = sysdev_class_register(&ioapic_sysdev_class); 2414 error = sysdev_class_register(&ioapic_sysdev_class);
2373 if (error) 2415 if (error)
2374 return error; 2416 return error;
2375 2417
2376 for (i = 0; i < nr_ioapics; i++ ) { 2418 for (i = 0; i < nr_ioapics; i++) {
2377 size = sizeof(struct sys_device) + nr_ioapic_registers[i] 2419 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2378 * sizeof(struct IO_APIC_route_entry); 2420 * sizeof(struct IO_APIC_route_entry);
2379 mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); 2421 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2380 if (!mp_ioapic_data[i]) { 2422 if (!mp_ioapic_data[i]) {
2381 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); 2423 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2382 continue; 2424 continue;
2383 } 2425 }
2384 memset(mp_ioapic_data[i], 0, size);
2385 dev = &mp_ioapic_data[i]->dev; 2426 dev = &mp_ioapic_data[i]->dev;
2386 dev->id = i; 2427 dev->id = i;
2387 dev->cls = &ioapic_sysdev_class; 2428 dev->cls = &ioapic_sysdev_class;
2388 error = sysdev_register(dev); 2429 error = sysdev_register(dev);
2389 if (error) { 2430 if (error) {
@@ -2458,7 +2499,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2458 msg->address_lo = 2499 msg->address_lo =
2459 MSI_ADDR_BASE_LO | 2500 MSI_ADDR_BASE_LO |
2460 ((INT_DEST_MODE == 0) ? 2501 ((INT_DEST_MODE == 0) ?
2461 MSI_ADDR_DEST_MODE_PHYSICAL: 2502MSI_ADDR_DEST_MODE_PHYSICAL:
2462 MSI_ADDR_DEST_MODE_LOGICAL) | 2503 MSI_ADDR_DEST_MODE_LOGICAL) |
2463 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 2504 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2464 MSI_ADDR_REDIRECTION_CPU: 2505 MSI_ADDR_REDIRECTION_CPU:
@@ -2469,7 +2510,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2469 MSI_DATA_TRIGGER_EDGE | 2510 MSI_DATA_TRIGGER_EDGE |
2470 MSI_DATA_LEVEL_ASSERT | 2511 MSI_DATA_LEVEL_ASSERT |
2471 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 2512 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2472 MSI_DATA_DELIVERY_FIXED: 2513MSI_DATA_DELIVERY_FIXED:
2473 MSI_DATA_DELIVERY_LOWPRI) | 2514 MSI_DATA_DELIVERY_LOWPRI) |
2474 MSI_DATA_VECTOR(vector); 2515 MSI_DATA_VECTOR(vector);
2475 } 2516 }
@@ -2640,12 +2681,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2640#endif /* CONFIG_HT_IRQ */ 2681#endif /* CONFIG_HT_IRQ */
2641 2682
2642/* -------------------------------------------------------------------------- 2683/* --------------------------------------------------------------------------
2643 ACPI-based IOAPIC Configuration 2684 ACPI-based IOAPIC Configuration
2644 -------------------------------------------------------------------------- */ 2685 -------------------------------------------------------------------------- */
2645 2686
2646#ifdef CONFIG_ACPI 2687#ifdef CONFIG_ACPI
2647 2688
2648int __init io_apic_get_unique_id (int ioapic, int apic_id) 2689int __init io_apic_get_unique_id(int ioapic, int apic_id)
2649{ 2690{
2650 union IO_APIC_reg_00 reg_00; 2691 union IO_APIC_reg_00 reg_00;
2651 static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 2692 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -2654,10 +2695,10 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2654 int i = 0; 2695 int i = 0;
2655 2696
2656 /* 2697 /*
2657 * The P4 platform supports up to 256 APIC IDs on two separate APIC 2698 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2658 * buses (one for LAPICs, one for IOAPICs), where predecessors only 2699 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2659 * supports up to 16 on one shared APIC bus. 2700 * supports up to 16 on one shared APIC bus.
2660 * 2701 *
2661 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full 2702 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2662 * advantage of new APIC bus architecture. 2703 * advantage of new APIC bus architecture.
2663 */ 2704 */
@@ -2676,7 +2717,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2676 } 2717 }
2677 2718
2678 /* 2719 /*
2679 * Every APIC in a system must have a unique ID or we get lots of nice 2720 * Every APIC in a system must have a unique ID or we get lots of nice
2680 * 'stuck on smp_invalidate_needed IPI wait' messages. 2721 * 'stuck on smp_invalidate_needed IPI wait' messages.
2681 */ 2722 */
2682 if (check_apicid_used(apic_id_map, apic_id)) { 2723 if (check_apicid_used(apic_id_map, apic_id)) {
@@ -2693,7 +2734,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2693 "trying %d\n", ioapic, apic_id, i); 2734 "trying %d\n", ioapic, apic_id, i);
2694 2735
2695 apic_id = i; 2736 apic_id = i;
2696 } 2737 }
2697 2738
2698 tmp = apicid_to_cpu_present(apic_id); 2739 tmp = apicid_to_cpu_present(apic_id);
2699 physids_or(apic_id_map, apic_id_map, tmp); 2740 physids_or(apic_id_map, apic_id_map, tmp);
@@ -2720,7 +2761,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
2720} 2761}
2721 2762
2722 2763
2723int __init io_apic_get_version (int ioapic) 2764int __init io_apic_get_version(int ioapic)
2724{ 2765{
2725 union IO_APIC_reg_01 reg_01; 2766 union IO_APIC_reg_01 reg_01;
2726 unsigned long flags; 2767 unsigned long flags;
@@ -2733,7 +2774,7 @@ int __init io_apic_get_version (int ioapic)
2733} 2774}
2734 2775
2735 2776
2736int __init io_apic_get_redir_entries (int ioapic) 2777int __init io_apic_get_redir_entries(int ioapic)
2737{ 2778{
2738 union IO_APIC_reg_01 reg_01; 2779 union IO_APIC_reg_01 reg_01;
2739 unsigned long flags; 2780 unsigned long flags;
@@ -2746,7 +2787,7 @@ int __init io_apic_get_redir_entries (int ioapic)
2746} 2787}
2747 2788
2748 2789
2749int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) 2790int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
2750{ 2791{
2751 struct IO_APIC_route_entry entry; 2792 struct IO_APIC_route_entry entry;
2752 2793
@@ -2762,7 +2803,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2762 * corresponding device driver registers for this IRQ. 2803 * corresponding device driver registers for this IRQ.
2763 */ 2804 */
2764 2805
2765 memset(&entry,0,sizeof(entry)); 2806 memset(&entry, 0, sizeof(entry));
2766 2807
2767 entry.delivery_mode = INT_DELIVERY_MODE; 2808 entry.delivery_mode = INT_DELIVERY_MODE;
2768 entry.dest_mode = INT_DEST_MODE; 2809 entry.dest_mode = INT_DEST_MODE;
@@ -2781,7 +2822,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2781 2822
2782 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " 2823 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2783 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, 2824 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2784 mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, 2825 mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
2785 edge_level, active_high_low); 2826 edge_level, active_high_low);
2786 2827
2787 ioapic_register_intr(irq, entry.vector, edge_level); 2828 ioapic_register_intr(irq, entry.vector, edge_level);
@@ -2802,8 +2843,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2802 return -1; 2843 return -1;
2803 2844
2804 for (i = 0; i < mp_irq_entries; i++) 2845 for (i = 0; i < mp_irq_entries; i++)
2805 if (mp_irqs[i].mpc_irqtype == mp_INT && 2846 if (mp_irqs[i].mp_irqtype == mp_INT &&
2806 mp_irqs[i].mpc_srcbusirq == bus_irq) 2847 mp_irqs[i].mp_srcbusirq == bus_irq)
2807 break; 2848 break;
2808 if (i >= mp_irq_entries) 2849 if (i >= mp_irq_entries)
2809 return -1; 2850 return -1;
@@ -2836,3 +2877,34 @@ static int __init parse_noapic(char *arg)
2836 return 0; 2877 return 0;
2837} 2878}
2838early_param("noapic", parse_noapic); 2879early_param("noapic", parse_noapic);
2880
2881void __init ioapic_init_mappings(void)
2882{
2883 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
2884 int i;
2885
2886 for (i = 0; i < nr_ioapics; i++) {
2887 if (smp_found_config) {
2888 ioapic_phys = mp_ioapics[i].mp_apicaddr;
2889 if (!ioapic_phys) {
2890 printk(KERN_ERR
2891 "WARNING: bogus zero IO-APIC "
2892 "address found in MPTABLE, "
2893 "disabling IO/APIC support!\n");
2894 smp_found_config = 0;
2895 skip_ioapic_setup = 1;
2896 goto fake_ioapic_page;
2897 }
2898 } else {
2899fake_ioapic_page:
2900 ioapic_phys = (unsigned long)
2901 alloc_bootmem_pages(PAGE_SIZE);
2902 ioapic_phys = __pa(ioapic_phys);
2903 }
2904 set_fixmap_nocache(idx, ioapic_phys);
2905 printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
2906 __fix_to_virt(idx), ioapic_phys);
2907 idx++;
2908 }
2909}
2910
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index ef1a8dfcc529..9e645cba11c4 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -61,7 +61,7 @@ struct irq_cfg {
61}; 61};
62 62
63/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 63/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
64struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { 64static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
65 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, 65 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
66 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, 66 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
67 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, 67 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
@@ -82,6 +82,10 @@ struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
82 82
83static int assign_irq_vector(int irq, cpumask_t mask); 83static int assign_irq_vector(int irq, cpumask_t mask);
84 84
85int first_system_vector = 0xfe;
86
87char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
88
85#define __apicdebuginit __init 89#define __apicdebuginit __init
86 90
87int sis_apic_bug; /* not actually supported, dummy for compile */ 91int sis_apic_bug; /* not actually supported, dummy for compile */
@@ -90,7 +94,14 @@ static int no_timer_check;
90 94
91static int disable_timer_pin_1 __initdata; 95static int disable_timer_pin_1 __initdata;
92 96
93int timer_over_8254 __initdata = 1; 97static bool mask_ioapic_irq_2 __initdata;
98
99void __init force_mask_ioapic_irq_2(void)
100{
101 mask_ioapic_irq_2 = true;
102}
103
104int timer_through_8259 __initdata;
94 105
95/* Where if anywhere is the i8259 connect in external int mode */ 106/* Where if anywhere is the i8259 connect in external int mode */
96static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 107static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -104,15 +115,17 @@ DEFINE_SPINLOCK(vector_lock);
104int nr_ioapic_registers[MAX_IO_APICS]; 115int nr_ioapic_registers[MAX_IO_APICS];
105 116
106/* I/O APIC entries */ 117/* I/O APIC entries */
107struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; 118struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
108int nr_ioapics; 119int nr_ioapics;
109 120
110/* MP IRQ source entries */ 121/* MP IRQ source entries */
111struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 122struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
112 123
113/* # of MP IRQ source entries */ 124/* # of MP IRQ source entries */
114int mp_irq_entries; 125int mp_irq_entries;
115 126
127DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
128
116/* 129/*
117 * Rough estimation of how many shared IRQs there are, can 130 * Rough estimation of how many shared IRQs there are, can
118 * be changed anytime. 131 * be changed anytime.
@@ -140,7 +153,7 @@ struct io_apic {
140static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 153static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
141{ 154{
142 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 155 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
143 + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); 156 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
144} 157}
145 158
146static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 159static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -183,7 +196,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
183 break; 196 break;
184 reg = io_apic_read(entry->apic, 0x10 + pin*2); 197 reg = io_apic_read(entry->apic, 0x10 + pin*2);
185 /* Is the remote IRR bit set? */ 198 /* Is the remote IRR bit set? */
186 if ((reg >> 14) & 1) { 199 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
187 spin_unlock_irqrestore(&ioapic_lock, flags); 200 spin_unlock_irqrestore(&ioapic_lock, flags);
188 return true; 201 return true;
189 } 202 }
@@ -298,7 +311,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
298 break; 311 break;
299 io_apic_write(apic, 0x11 + pin*2, dest); 312 io_apic_write(apic, 0x11 + pin*2, dest);
300 reg = io_apic_read(apic, 0x10 + pin*2); 313 reg = io_apic_read(apic, 0x10 + pin*2);
301 reg &= ~0x000000ff; 314 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
302 reg |= vector; 315 reg |= vector;
303 io_apic_modify(apic, reg); 316 io_apic_modify(apic, reg);
304 if (!entry->next) 317 if (!entry->next)
@@ -360,16 +373,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
360 entry->pin = pin; 373 entry->pin = pin;
361} 374}
362 375
376/*
377 * Reroute an IRQ to a different pin.
378 */
379static void __init replace_pin_at_irq(unsigned int irq,
380 int oldapic, int oldpin,
381 int newapic, int newpin)
382{
383 struct irq_pin_list *entry = irq_2_pin + irq;
384
385 while (1) {
386 if (entry->apic == oldapic && entry->pin == oldpin) {
387 entry->apic = newapic;
388 entry->pin = newpin;
389 }
390 if (!entry->next)
391 break;
392 entry = irq_2_pin + entry->next;
393 }
394}
395
363 396
364#define DO_ACTION(name,R,ACTION, FINAL) \ 397#define DO_ACTION(name,R,ACTION, FINAL) \
365 \ 398 \
366 static void name##_IO_APIC_irq (unsigned int irq) \ 399 static void name##_IO_APIC_irq (unsigned int irq) \
367 __DO_ACTION(R, ACTION, FINAL) 400 __DO_ACTION(R, ACTION, FINAL)
368 401
369DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) 402/* mask = 1 */
370 /* mask = 1 */ 403DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
371DO_ACTION( __unmask, 0, &= 0xfffeffff, ) 404
372 /* mask = 0 */ 405/* mask = 0 */
406DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
373 407
374static void mask_IO_APIC_irq (unsigned int irq) 408static void mask_IO_APIC_irq (unsigned int irq)
375{ 409{
@@ -430,20 +464,6 @@ static int __init disable_timer_pin_setup(char *arg)
430} 464}
431__setup("disable_timer_pin_1", disable_timer_pin_setup); 465__setup("disable_timer_pin_1", disable_timer_pin_setup);
432 466
433static int __init setup_disable_8254_timer(char *s)
434{
435 timer_over_8254 = -1;
436 return 1;
437}
438static int __init setup_enable_8254_timer(char *s)
439{
440 timer_over_8254 = 2;
441 return 1;
442}
443
444__setup("disable_8254_timer", setup_disable_8254_timer);
445__setup("enable_8254_timer", setup_enable_8254_timer);
446
447 467
448/* 468/*
449 * Find the IRQ entry number of a certain pin. 469 * Find the IRQ entry number of a certain pin.
@@ -453,10 +473,10 @@ static int find_irq_entry(int apic, int pin, int type)
453 int i; 473 int i;
454 474
455 for (i = 0; i < mp_irq_entries; i++) 475 for (i = 0; i < mp_irq_entries; i++)
456 if (mp_irqs[i].mpc_irqtype == type && 476 if (mp_irqs[i].mp_irqtype == type &&
457 (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || 477 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
458 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && 478 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
459 mp_irqs[i].mpc_dstirq == pin) 479 mp_irqs[i].mp_dstirq == pin)
460 return i; 480 return i;
461 481
462 return -1; 482 return -1;
@@ -470,13 +490,13 @@ static int __init find_isa_irq_pin(int irq, int type)
470 int i; 490 int i;
471 491
472 for (i = 0; i < mp_irq_entries; i++) { 492 for (i = 0; i < mp_irq_entries; i++) {
473 int lbus = mp_irqs[i].mpc_srcbus; 493 int lbus = mp_irqs[i].mp_srcbus;
474 494
475 if (test_bit(lbus, mp_bus_not_pci) && 495 if (test_bit(lbus, mp_bus_not_pci) &&
476 (mp_irqs[i].mpc_irqtype == type) && 496 (mp_irqs[i].mp_irqtype == type) &&
477 (mp_irqs[i].mpc_srcbusirq == irq)) 497 (mp_irqs[i].mp_srcbusirq == irq))
478 498
479 return mp_irqs[i].mpc_dstirq; 499 return mp_irqs[i].mp_dstirq;
480 } 500 }
481 return -1; 501 return -1;
482} 502}
@@ -486,17 +506,17 @@ static int __init find_isa_irq_apic(int irq, int type)
486 int i; 506 int i;
487 507
488 for (i = 0; i < mp_irq_entries; i++) { 508 for (i = 0; i < mp_irq_entries; i++) {
489 int lbus = mp_irqs[i].mpc_srcbus; 509 int lbus = mp_irqs[i].mp_srcbus;
490 510
491 if (test_bit(lbus, mp_bus_not_pci) && 511 if (test_bit(lbus, mp_bus_not_pci) &&
492 (mp_irqs[i].mpc_irqtype == type) && 512 (mp_irqs[i].mp_irqtype == type) &&
493 (mp_irqs[i].mpc_srcbusirq == irq)) 513 (mp_irqs[i].mp_srcbusirq == irq))
494 break; 514 break;
495 } 515 }
496 if (i < mp_irq_entries) { 516 if (i < mp_irq_entries) {
497 int apic; 517 int apic;
498 for(apic = 0; apic < nr_ioapics; apic++) { 518 for(apic = 0; apic < nr_ioapics; apic++) {
499 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) 519 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
500 return apic; 520 return apic;
501 } 521 }
502 } 522 }
@@ -516,28 +536,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
516 536
517 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", 537 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
518 bus, slot, pin); 538 bus, slot, pin);
519 if (mp_bus_id_to_pci_bus[bus] == -1) { 539 if (test_bit(bus, mp_bus_not_pci)) {
520 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); 540 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
521 return -1; 541 return -1;
522 } 542 }
523 for (i = 0; i < mp_irq_entries; i++) { 543 for (i = 0; i < mp_irq_entries; i++) {
524 int lbus = mp_irqs[i].mpc_srcbus; 544 int lbus = mp_irqs[i].mp_srcbus;
525 545
526 for (apic = 0; apic < nr_ioapics; apic++) 546 for (apic = 0; apic < nr_ioapics; apic++)
527 if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || 547 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
528 mp_irqs[i].mpc_dstapic == MP_APIC_ALL) 548 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
529 break; 549 break;
530 550
531 if (!test_bit(lbus, mp_bus_not_pci) && 551 if (!test_bit(lbus, mp_bus_not_pci) &&
532 !mp_irqs[i].mpc_irqtype && 552 !mp_irqs[i].mp_irqtype &&
533 (bus == lbus) && 553 (bus == lbus) &&
534 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { 554 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
535 int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); 555 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
536 556
537 if (!(apic || IO_APIC_IRQ(irq))) 557 if (!(apic || IO_APIC_IRQ(irq)))
538 continue; 558 continue;
539 559
540 if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) 560 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
541 return irq; 561 return irq;
542 /* 562 /*
543 * Use the first all-but-pin matching entry as a 563 * Use the first all-but-pin matching entry as a
@@ -565,13 +585,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
565 585
566static int MPBIOS_polarity(int idx) 586static int MPBIOS_polarity(int idx)
567{ 587{
568 int bus = mp_irqs[idx].mpc_srcbus; 588 int bus = mp_irqs[idx].mp_srcbus;
569 int polarity; 589 int polarity;
570 590
571 /* 591 /*
572 * Determine IRQ line polarity (high active or low active): 592 * Determine IRQ line polarity (high active or low active):
573 */ 593 */
574 switch (mp_irqs[idx].mpc_irqflag & 3) 594 switch (mp_irqs[idx].mp_irqflag & 3)
575 { 595 {
576 case 0: /* conforms, ie. bus-type dependent polarity */ 596 case 0: /* conforms, ie. bus-type dependent polarity */
577 if (test_bit(bus, mp_bus_not_pci)) 597 if (test_bit(bus, mp_bus_not_pci))
@@ -607,13 +627,13 @@ static int MPBIOS_polarity(int idx)
607 627
608static int MPBIOS_trigger(int idx) 628static int MPBIOS_trigger(int idx)
609{ 629{
610 int bus = mp_irqs[idx].mpc_srcbus; 630 int bus = mp_irqs[idx].mp_srcbus;
611 int trigger; 631 int trigger;
612 632
613 /* 633 /*
614 * Determine IRQ trigger mode (edge or level sensitive): 634 * Determine IRQ trigger mode (edge or level sensitive):
615 */ 635 */
616 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) 636 switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
617 { 637 {
618 case 0: /* conforms, ie. bus-type dependent */ 638 case 0: /* conforms, ie. bus-type dependent */
619 if (test_bit(bus, mp_bus_not_pci)) 639 if (test_bit(bus, mp_bus_not_pci))
@@ -660,16 +680,16 @@ static inline int irq_trigger(int idx)
660static int pin_2_irq(int idx, int apic, int pin) 680static int pin_2_irq(int idx, int apic, int pin)
661{ 681{
662 int irq, i; 682 int irq, i;
663 int bus = mp_irqs[idx].mpc_srcbus; 683 int bus = mp_irqs[idx].mp_srcbus;
664 684
665 /* 685 /*
666 * Debugging check, we are in big trouble if this message pops up! 686 * Debugging check, we are in big trouble if this message pops up!
667 */ 687 */
668 if (mp_irqs[idx].mpc_dstirq != pin) 688 if (mp_irqs[idx].mp_dstirq != pin)
669 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 689 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
670 690
671 if (test_bit(bus, mp_bus_not_pci)) { 691 if (test_bit(bus, mp_bus_not_pci)) {
672 irq = mp_irqs[idx].mpc_srcbusirq; 692 irq = mp_irqs[idx].mp_srcbusirq;
673 } else { 693 } else {
674 /* 694 /*
675 * PCI IRQs are mapped in order 695 * PCI IRQs are mapped in order
@@ -730,7 +750,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
730 offset = current_offset; 750 offset = current_offset;
731next: 751next:
732 vector += 8; 752 vector += 8;
733 if (vector >= FIRST_SYSTEM_VECTOR) { 753 if (vector >= first_system_vector) {
734 /* If we run out of vectors on large boxen, must share them. */ 754 /* If we run out of vectors on large boxen, must share them. */
735 offset = (offset + 1) % 8; 755 offset = (offset + 1) % 8;
736 vector = FIRST_DEVICE_VECTOR + offset; 756 vector = FIRST_DEVICE_VECTOR + offset;
@@ -788,7 +808,7 @@ static void __clear_irq_vector(int irq)
788 cpus_clear(cfg->domain); 808 cpus_clear(cfg->domain);
789} 809}
790 810
791void __setup_vector_irq(int cpu) 811static void __setup_vector_irq(int cpu)
792{ 812{
793 /* Initialize vector_irq on a new cpu */ 813 /* Initialize vector_irq on a new cpu */
794 /* This function must be called with vector_lock held */ 814 /* This function must be called with vector_lock held */
@@ -811,6 +831,13 @@ void __setup_vector_irq(int cpu)
811 } 831 }
812} 832}
813 833
834void setup_vector_irq(int cpu)
835{
836 spin_lock(&vector_lock);
837 __setup_vector_irq(smp_processor_id());
838 spin_unlock(&vector_lock);
839}
840
814 841
815static struct irq_chip ioapic_chip; 842static struct irq_chip ioapic_chip;
816 843
@@ -846,7 +873,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
846 apic_printk(APIC_VERBOSE,KERN_DEBUG 873 apic_printk(APIC_VERBOSE,KERN_DEBUG
847 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 874 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
848 "IRQ %d Mode:%i Active:%i)\n", 875 "IRQ %d Mode:%i Active:%i)\n",
849 apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector, 876 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
850 irq, trigger, polarity); 877 irq, trigger, polarity);
851 878
852 /* 879 /*
@@ -887,10 +914,10 @@ static void __init setup_IO_APIC_irqs(void)
887 idx = find_irq_entry(apic,pin,mp_INT); 914 idx = find_irq_entry(apic,pin,mp_INT);
888 if (idx == -1) { 915 if (idx == -1) {
889 if (first_notcon) { 916 if (first_notcon) {
890 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); 917 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
891 first_notcon = 0; 918 first_notcon = 0;
892 } else 919 } else
893 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); 920 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
894 continue; 921 continue;
895 } 922 }
896 if (!first_notcon) { 923 if (!first_notcon) {
@@ -911,26 +938,21 @@ static void __init setup_IO_APIC_irqs(void)
911} 938}
912 939
913/* 940/*
914 * Set up the 8259A-master output pin as broadcast to all 941 * Set up the timer pin, possibly with the 8259A-master behind.
915 * CPUs.
916 */ 942 */
917static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) 943static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944 int vector)
918{ 945{
919 struct IO_APIC_route_entry entry; 946 struct IO_APIC_route_entry entry;
920 947
921 memset(&entry, 0, sizeof(entry)); 948 memset(&entry, 0, sizeof(entry));
922 949
923 disable_8259A_irq(0);
924
925 /* mask LVT0 */
926 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
927
928 /* 950 /*
929 * We use logical delivery to get the timer IRQ 951 * We use logical delivery to get the timer IRQ
930 * to the first CPU. 952 * to the first CPU.
931 */ 953 */
932 entry.dest_mode = INT_DEST_MODE; 954 entry.dest_mode = INT_DEST_MODE;
933 entry.mask = 0; /* unmask IRQ now */ 955 entry.mask = 1; /* mask IRQ now */
934 entry.dest = cpu_mask_to_apicid(TARGET_CPUS); 956 entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
935 entry.delivery_mode = INT_DELIVERY_MODE; 957 entry.delivery_mode = INT_DELIVERY_MODE;
936 entry.polarity = 0; 958 entry.polarity = 0;
@@ -939,7 +961,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
939 961
940 /* 962 /*
941 * The timer IRQ doesn't have to know that behind the 963 * The timer IRQ doesn't have to know that behind the
942 * scene we have a 8259A-master in AEOI mode ... 964 * scene we may have a 8259A-master in AEOI mode ...
943 */ 965 */
944 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 966 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
945 967
@@ -947,8 +969,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
947 * Add it to the IO-APIC irq-routing table: 969 * Add it to the IO-APIC irq-routing table:
948 */ 970 */
949 ioapic_write_entry(apic, pin, entry); 971 ioapic_write_entry(apic, pin, entry);
950
951 enable_8259A_irq(0);
952} 972}
953 973
954void __apicdebuginit print_IO_APIC(void) 974void __apicdebuginit print_IO_APIC(void)
@@ -965,7 +985,7 @@ void __apicdebuginit print_IO_APIC(void)
965 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 985 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
966 for (i = 0; i < nr_ioapics; i++) 986 for (i = 0; i < nr_ioapics; i++)
967 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 987 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
968 mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); 988 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
969 989
970 /* 990 /*
971 * We are a bit conservative about what we expect. We have to 991 * We are a bit conservative about what we expect. We have to
@@ -983,7 +1003,7 @@ void __apicdebuginit print_IO_APIC(void)
983 spin_unlock_irqrestore(&ioapic_lock, flags); 1003 spin_unlock_irqrestore(&ioapic_lock, flags);
984 1004
985 printk("\n"); 1005 printk("\n");
986 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); 1006 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
987 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1007 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
988 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1008 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
989 1009
@@ -1077,6 +1097,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1077 1097
1078 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1098 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1079 smp_processor_id(), hard_smp_processor_id()); 1099 smp_processor_id(), hard_smp_processor_id());
1100 v = apic_read(APIC_ID);
1080 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1101 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
1081 v = apic_read(APIC_LVR); 1102 v = apic_read(APIC_LVR);
1082 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1103 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
@@ -1540,7 +1561,7 @@ static inline void init_IO_APIC_traps(void)
1540 } 1561 }
1541} 1562}
1542 1563
1543static void enable_lapic_irq (unsigned int irq) 1564static void unmask_lapic_irq(unsigned int irq)
1544{ 1565{
1545 unsigned long v; 1566 unsigned long v;
1546 1567
@@ -1548,7 +1569,7 @@ static void enable_lapic_irq (unsigned int irq)
1548 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 1569 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1549} 1570}
1550 1571
1551static void disable_lapic_irq (unsigned int irq) 1572static void mask_lapic_irq(unsigned int irq)
1552{ 1573{
1553 unsigned long v; 1574 unsigned long v;
1554 1575
@@ -1561,19 +1582,20 @@ static void ack_lapic_irq (unsigned int irq)
1561 ack_APIC_irq(); 1582 ack_APIC_irq();
1562} 1583}
1563 1584
1564static void end_lapic_irq (unsigned int i) { /* nothing */ } 1585static struct irq_chip lapic_chip __read_mostly = {
1565 1586 .name = "local-APIC",
1566static struct hw_interrupt_type lapic_irq_type __read_mostly = { 1587 .mask = mask_lapic_irq,
1567 .name = "local-APIC", 1588 .unmask = unmask_lapic_irq,
1568 .typename = "local-APIC-edge", 1589 .ack = ack_lapic_irq,
1569 .startup = NULL, /* startup_irq() not used for IRQ0 */
1570 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1571 .enable = enable_lapic_irq,
1572 .disable = disable_lapic_irq,
1573 .ack = ack_lapic_irq,
1574 .end = end_lapic_irq,
1575}; 1590};
1576 1591
1592static void lapic_register_intr(int irq)
1593{
1594 irq_desc[irq].status &= ~IRQ_LEVEL;
1595 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
1596 "edge");
1597}
1598
1577static void __init setup_nmi(void) 1599static void __init setup_nmi(void)
1578{ 1600{
1579 /* 1601 /*
@@ -1659,6 +1681,7 @@ static inline void __init check_timer(void)
1659 struct irq_cfg *cfg = irq_cfg + 0; 1681 struct irq_cfg *cfg = irq_cfg + 0;
1660 int apic1, pin1, apic2, pin2; 1682 int apic1, pin1, apic2, pin2;
1661 unsigned long flags; 1683 unsigned long flags;
1684 int no_pin1 = 0;
1662 1685
1663 local_irq_save(flags); 1686 local_irq_save(flags);
1664 1687
@@ -1669,16 +1692,11 @@ static inline void __init check_timer(void)
1669 assign_irq_vector(0, TARGET_CPUS); 1692 assign_irq_vector(0, TARGET_CPUS);
1670 1693
1671 /* 1694 /*
1672 * Subtle, code in do_timer_interrupt() expects an AEOI 1695 * As IRQ0 is to be enabled in the 8259A, the virtual
1673 * mode for the 8259A whenever interrupts are routed 1696 * wire has to be disabled in the local APIC.
1674 * through I/O APICs. Also IRQ0 has to be enabled in
1675 * the 8259A which implies the virtual wire has to be
1676 * disabled in the local APIC.
1677 */ 1697 */
1678 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 1698 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1679 init_8259A(1); 1699 init_8259A(1);
1680 if (timer_over_8254 > 0)
1681 enable_8259A_irq(0);
1682 1700
1683 pin1 = find_isa_irq_pin(0, mp_INT); 1701 pin1 = find_isa_irq_pin(0, mp_INT);
1684 apic1 = find_isa_irq_apic(0, mp_INT); 1702 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -1688,15 +1706,36 @@ static inline void __init check_timer(void)
1688 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", 1706 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1689 cfg->vector, apic1, pin1, apic2, pin2); 1707 cfg->vector, apic1, pin1, apic2, pin2);
1690 1708
1709 if (mask_ioapic_irq_2)
1710 mask_IO_APIC_irq(2);
1711
1712 /*
1713 * Some BIOS writers are clueless and report the ExtINTA
1714 * I/O APIC input from the cascaded 8259A as the timer
1715 * interrupt input. So just in case, if only one pin
1716 * was found above, try it both directly and through the
1717 * 8259A.
1718 */
1719 if (pin1 == -1) {
1720 pin1 = pin2;
1721 apic1 = apic2;
1722 no_pin1 = 1;
1723 } else if (pin2 == -1) {
1724 pin2 = pin1;
1725 apic2 = apic1;
1726 }
1727
1691 if (pin1 != -1) { 1728 if (pin1 != -1) {
1692 /* 1729 /*
1693 * Ok, does IRQ0 through the IOAPIC work? 1730 * Ok, does IRQ0 through the IOAPIC work?
1694 */ 1731 */
1732 if (no_pin1) {
1733 add_pin_to_irq(0, apic1, pin1);
1734 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
1735 }
1695 unmask_IO_APIC_irq(0); 1736 unmask_IO_APIC_irq(0);
1696 if (!no_timer_check && timer_irq_works()) { 1737 if (!no_timer_check && timer_irq_works()) {
1697 nmi_watchdog_default();
1698 if (nmi_watchdog == NMI_IO_APIC) { 1738 if (nmi_watchdog == NMI_IO_APIC) {
1699 disable_8259A_irq(0);
1700 setup_nmi(); 1739 setup_nmi();
1701 enable_8259A_irq(0); 1740 enable_8259A_irq(0);
1702 } 1741 }
@@ -1705,43 +1744,48 @@ static inline void __init check_timer(void)
1705 goto out; 1744 goto out;
1706 } 1745 }
1707 clear_IO_APIC_pin(apic1, pin1); 1746 clear_IO_APIC_pin(apic1, pin1);
1708 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " 1747 if (!no_pin1)
1709 "connected to IO-APIC\n"); 1748 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
1710 } 1749 "8254 timer not connected to IO-APIC\n");
1711 1750
1712 apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " 1751 apic_printk(APIC_VERBOSE,KERN_INFO
1713 "through the 8259A ... "); 1752 "...trying to set up timer (IRQ0) "
1714 if (pin2 != -1) { 1753 "through the 8259A ... ");
1715 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", 1754 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
1716 apic2, pin2); 1755 apic2, pin2);
1717 /* 1756 /*
1718 * legacy devices should be connected to IO APIC #0 1757 * legacy devices should be connected to IO APIC #0
1719 */ 1758 */
1720 setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); 1759 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
1760 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
1761 unmask_IO_APIC_irq(0);
1762 enable_8259A_irq(0);
1721 if (timer_irq_works()) { 1763 if (timer_irq_works()) {
1722 apic_printk(APIC_VERBOSE," works.\n"); 1764 apic_printk(APIC_VERBOSE," works.\n");
1723 nmi_watchdog_default(); 1765 timer_through_8259 = 1;
1724 if (nmi_watchdog == NMI_IO_APIC) { 1766 if (nmi_watchdog == NMI_IO_APIC) {
1767 disable_8259A_irq(0);
1725 setup_nmi(); 1768 setup_nmi();
1769 enable_8259A_irq(0);
1726 } 1770 }
1727 goto out; 1771 goto out;
1728 } 1772 }
1729 /* 1773 /*
1730 * Cleanup, just in case ... 1774 * Cleanup, just in case ...
1731 */ 1775 */
1776 disable_8259A_irq(0);
1732 clear_IO_APIC_pin(apic2, pin2); 1777 clear_IO_APIC_pin(apic2, pin2);
1778 apic_printk(APIC_VERBOSE," failed.\n");
1733 } 1779 }
1734 apic_printk(APIC_VERBOSE," failed.\n");
1735 1780
1736 if (nmi_watchdog == NMI_IO_APIC) { 1781 if (nmi_watchdog == NMI_IO_APIC) {
1737 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 1782 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
1738 nmi_watchdog = 0; 1783 nmi_watchdog = NMI_NONE;
1739 } 1784 }
1740 1785
1741 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 1786 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1742 1787
1743 disable_8259A_irq(0); 1788 lapic_register_intr(0);
1744 irq_desc[0].chip = &lapic_irq_type;
1745 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 1789 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1746 enable_8259A_irq(0); 1790 enable_8259A_irq(0);
1747 1791
@@ -1749,6 +1793,7 @@ static inline void __init check_timer(void)
1749 apic_printk(APIC_VERBOSE," works.\n"); 1793 apic_printk(APIC_VERBOSE," works.\n");
1750 goto out; 1794 goto out;
1751 } 1795 }
1796 disable_8259A_irq(0);
1752 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 1797 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1753 apic_printk(APIC_VERBOSE," failed.\n"); 1798 apic_printk(APIC_VERBOSE," failed.\n");
1754 1799
@@ -1778,11 +1823,21 @@ static int __init notimercheck(char *s)
1778__setup("no_timer_check", notimercheck); 1823__setup("no_timer_check", notimercheck);
1779 1824
1780/* 1825/*
1781 * 1826 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
1782 * IRQs that are handled by the PIC in the MPS IOAPIC case. 1827 * to devices. However there may be an I/O APIC pin available for
1783 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. 1828 * this interrupt regardless. The pin may be left unconnected, but
1784 * Linux doesn't really care, as it's not actually used 1829 * typically it will be reused as an ExtINT cascade interrupt for
1785 * for any interrupt handling anyway. 1830 * the master 8259A. In the MPS case such a pin will normally be
1831 * reported as an ExtINT interrupt in the MP table. With ACPI
1832 * there is no provision for ExtINT interrupts, and in the absence
1833 * of an override it would be treated as an ordinary ISA I/O APIC
1834 * interrupt, that is edge-triggered and unmasked by default. We
1835 * used to do this, but it caused problems on some systems because
1836 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
1837 * the same ExtINT cascade interrupt to drive the local APIC of the
1838 * bootstrap processor. Therefore we refrain from routing IRQ2 to
1839 * the I/O APIC in all cases now. No actual device should request
1840 * it anyway. --macro
1786 */ 1841 */
1787#define PIC_IRQS (1<<2) 1842#define PIC_IRQS (1<<2)
1788 1843
@@ -1793,10 +1848,7 @@ void __init setup_IO_APIC(void)
1793 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 1848 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
1794 */ 1849 */
1795 1850
1796 if (acpi_ioapic) 1851 io_apic_irqs = ~PIC_IRQS;
1797 io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
1798 else
1799 io_apic_irqs = ~PIC_IRQS;
1800 1852
1801 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 1853 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
1802 1854
@@ -1841,8 +1893,8 @@ static int ioapic_resume(struct sys_device *dev)
1841 1893
1842 spin_lock_irqsave(&ioapic_lock, flags); 1894 spin_lock_irqsave(&ioapic_lock, flags);
1843 reg_00.raw = io_apic_read(dev->id, 0); 1895 reg_00.raw = io_apic_read(dev->id, 0);
1844 if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { 1896 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
1845 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 1897 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
1846 io_apic_write(dev->id, 0, reg_00.raw); 1898 io_apic_write(dev->id, 0, reg_00.raw);
1847 } 1899 }
1848 spin_unlock_irqrestore(&ioapic_lock, flags); 1900 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2242,8 +2294,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2242 return -1; 2294 return -1;
2243 2295
2244 for (i = 0; i < mp_irq_entries; i++) 2296 for (i = 0; i < mp_irq_entries; i++)
2245 if (mp_irqs[i].mpc_irqtype == mp_INT && 2297 if (mp_irqs[i].mp_irqtype == mp_INT &&
2246 mp_irqs[i].mpc_srcbusirq == bus_irq) 2298 mp_irqs[i].mp_srcbusirq == bus_irq)
2247 break; 2299 break;
2248 if (i >= mp_irq_entries) 2300 if (i >= mp_irq_entries)
2249 return -1; 2301 return -1;
@@ -2336,7 +2388,7 @@ void __init ioapic_init_mappings(void)
2336 ioapic_res = ioapic_setup_resources(); 2388 ioapic_res = ioapic_setup_resources();
2337 for (i = 0; i < nr_ioapics; i++) { 2389 for (i = 0; i < nr_ioapics; i++) {
2338 if (smp_found_config) { 2390 if (smp_found_config) {
2339 ioapic_phys = mp_ioapics[i].mpc_apicaddr; 2391 ioapic_phys = mp_ioapics[i].mp_apicaddr;
2340 } else { 2392 } else {
2341 ioapic_phys = (unsigned long) 2393 ioapic_phys = (unsigned long)
2342 alloc_bootmem_pages(PAGE_SIZE); 2394 alloc_bootmem_pages(PAGE_SIZE);
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index c0df7b89ca23..9d98cda39ad9 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -8,7 +8,6 @@
8#include <linux/kernel_stat.h> 8#include <linux/kernel_stat.h>
9#include <linux/mc146818rtc.h> 9#include <linux/mc146818rtc.h>
10#include <linux/cache.h> 10#include <linux/cache.h>
11#include <linux/interrupt.h>
12#include <linux/cpu.h> 11#include <linux/cpu.h>
13#include <linux/module.h> 12#include <linux/module.h>
14 13
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 147352df28b9..47a6f6f12478 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -48,6 +48,29 @@ void ack_bad_irq(unsigned int irq)
48#endif 48#endif
49} 49}
50 50
51#ifdef CONFIG_DEBUG_STACKOVERFLOW
52/* Debugging check for stack overflow: is there less than 1KB free? */
53static int check_stack_overflow(void)
54{
55 long sp;
56
57 __asm__ __volatile__("andl %%esp,%0" :
58 "=r" (sp) : "0" (THREAD_SIZE - 1));
59
60 return sp < (sizeof(struct thread_info) + STACK_WARN);
61}
62
63static void print_stack_overflow(void)
64{
65 printk(KERN_WARNING "low stack detected by irq handler\n");
66 dump_stack();
67}
68
69#else
70static inline int check_stack_overflow(void) { return 0; }
71static inline void print_stack_overflow(void) { }
72#endif
73
51#ifdef CONFIG_4KSTACKS 74#ifdef CONFIG_4KSTACKS
52/* 75/*
53 * per-CPU IRQ handling contexts (thread information and stack) 76 * per-CPU IRQ handling contexts (thread information and stack)
@@ -59,48 +82,29 @@ union irq_ctx {
59 82
60static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; 83static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
61static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; 84static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
62#endif
63 85
64/* 86static char softirq_stack[NR_CPUS * THREAD_SIZE]
65 * do_IRQ handles all normal device IRQ's (the special 87 __attribute__((__section__(".bss.page_aligned")));
66 * SMP cross-CPU interrupts have their own specific
67 * handlers).
68 */
69unsigned int do_IRQ(struct pt_regs *regs)
70{
71 struct pt_regs *old_regs;
72 /* high bit used in ret_from_ code */
73 int irq = ~regs->orig_ax;
74 struct irq_desc *desc = irq_desc + irq;
75#ifdef CONFIG_4KSTACKS
76 union irq_ctx *curctx, *irqctx;
77 u32 *isp;
78#endif
79 88
80 if (unlikely((unsigned)irq >= NR_IRQS)) { 89static char hardirq_stack[NR_CPUS * THREAD_SIZE]
81 printk(KERN_EMERG "%s: cannot handle IRQ %d\n", 90 __attribute__((__section__(".bss.page_aligned")));
82 __func__, irq);
83 BUG();
84 }
85 91
86 old_regs = set_irq_regs(regs); 92static void call_on_stack(void *func, void *stack)
87 irq_enter(); 93{
88#ifdef CONFIG_DEBUG_STACKOVERFLOW 94 asm volatile("xchgl %%ebx,%%esp \n"
89 /* Debugging check for stack overflow: is there less than 1KB free? */ 95 "call *%%edi \n"
90 { 96 "movl %%ebx,%%esp \n"
91 long sp; 97 : "=b" (stack)
92 98 : "0" (stack),
93 __asm__ __volatile__("andl %%esp,%0" : 99 "D"(func)
94 "=r" (sp) : "0" (THREAD_SIZE - 1)); 100 : "memory", "cc", "edx", "ecx", "eax");
95 if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) { 101}
96 printk("do_IRQ: stack overflow: %ld\n",
97 sp - sizeof(struct thread_info));
98 dump_stack();
99 }
100 }
101#endif
102 102
103#ifdef CONFIG_4KSTACKS 103static inline int
104execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
105{
106 union irq_ctx *curctx, *irqctx;
107 u32 *isp, arg1, arg2;
104 108
105 curctx = (union irq_ctx *) current_thread_info(); 109 curctx = (union irq_ctx *) current_thread_info();
106 irqctx = hardirq_ctx[smp_processor_id()]; 110 irqctx = hardirq_ctx[smp_processor_id()];
@@ -111,52 +115,39 @@ unsigned int do_IRQ(struct pt_regs *regs)
111 * handler) we can't do that and just have to keep using the 115 * handler) we can't do that and just have to keep using the
112 * current stack (which is the irq stack already after all) 116 * current stack (which is the irq stack already after all)
113 */ 117 */
114 if (curctx != irqctx) { 118 if (unlikely(curctx == irqctx))
115 int arg1, arg2, bx; 119 return 0;
116
117 /* build the stack frame on the IRQ stack */
118 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
119 irqctx->tinfo.task = curctx->tinfo.task;
120 irqctx->tinfo.previous_esp = current_stack_pointer;
121 120
122 /* 121 /* build the stack frame on the IRQ stack */
123 * Copy the softirq bits in preempt_count so that the 122 isp = (u32 *) ((char*)irqctx + sizeof(*irqctx));
124 * softirq checks work in the hardirq context. 123 irqctx->tinfo.task = curctx->tinfo.task;
125 */ 124 irqctx->tinfo.previous_esp = current_stack_pointer;
126 irqctx->tinfo.preempt_count =
127 (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
128 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
129
130 asm volatile(
131 " xchgl %%ebx,%%esp \n"
132 " call *%%edi \n"
133 " movl %%ebx,%%esp \n"
134 : "=a" (arg1), "=d" (arg2), "=b" (bx)
135 : "0" (irq), "1" (desc), "2" (isp),
136 "D" (desc->handle_irq)
137 : "memory", "cc", "ecx"
138 );
139 } else
140#endif
141 desc->handle_irq(irq, desc);
142 125
143 irq_exit(); 126 /*
144 set_irq_regs(old_regs); 127 * Copy the softirq bits in preempt_count so that the
128 * softirq checks work in the hardirq context.
129 */
130 irqctx->tinfo.preempt_count =
131 (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
132 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
133
134 if (unlikely(overflow))
135 call_on_stack(print_stack_overflow, isp);
136
137 asm volatile("xchgl %%ebx,%%esp \n"
138 "call *%%edi \n"
139 "movl %%ebx,%%esp \n"
140 : "=a" (arg1), "=d" (arg2), "=b" (isp)
141 : "0" (irq), "1" (desc), "2" (isp),
142 "D" (desc->handle_irq)
143 : "memory", "cc", "ecx");
145 return 1; 144 return 1;
146} 145}
147 146
148#ifdef CONFIG_4KSTACKS
149
150static char softirq_stack[NR_CPUS * THREAD_SIZE]
151 __attribute__((__section__(".bss.page_aligned")));
152
153static char hardirq_stack[NR_CPUS * THREAD_SIZE]
154 __attribute__((__section__(".bss.page_aligned")));
155
156/* 147/*
157 * allocate per-cpu stacks for hardirq and for softirq processing 148 * allocate per-cpu stacks for hardirq and for softirq processing
158 */ 149 */
159void irq_ctx_init(int cpu) 150void __cpuinit irq_ctx_init(int cpu)
160{ 151{
161 union irq_ctx *irqctx; 152 union irq_ctx *irqctx;
162 153
@@ -164,25 +155,25 @@ void irq_ctx_init(int cpu)
164 return; 155 return;
165 156
166 irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; 157 irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
167 irqctx->tinfo.task = NULL; 158 irqctx->tinfo.task = NULL;
168 irqctx->tinfo.exec_domain = NULL; 159 irqctx->tinfo.exec_domain = NULL;
169 irqctx->tinfo.cpu = cpu; 160 irqctx->tinfo.cpu = cpu;
170 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; 161 irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
171 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 162 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
172 163
173 hardirq_ctx[cpu] = irqctx; 164 hardirq_ctx[cpu] = irqctx;
174 165
175 irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; 166 irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
176 irqctx->tinfo.task = NULL; 167 irqctx->tinfo.task = NULL;
177 irqctx->tinfo.exec_domain = NULL; 168 irqctx->tinfo.exec_domain = NULL;
178 irqctx->tinfo.cpu = cpu; 169 irqctx->tinfo.cpu = cpu;
179 irqctx->tinfo.preempt_count = 0; 170 irqctx->tinfo.preempt_count = 0;
180 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 171 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
181 172
182 softirq_ctx[cpu] = irqctx; 173 softirq_ctx[cpu] = irqctx;
183 174
184 printk("CPU %u irqstacks, hard=%p soft=%p\n", 175 printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
185 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); 176 cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
186} 177}
187 178
188void irq_ctx_exit(int cpu) 179void irq_ctx_exit(int cpu)
@@ -211,25 +202,56 @@ asmlinkage void do_softirq(void)
211 /* build the stack frame on the softirq stack */ 202 /* build the stack frame on the softirq stack */
212 isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); 203 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
213 204
214 asm volatile( 205 call_on_stack(__do_softirq, isp);
215 " xchgl %%ebx,%%esp \n"
216 " call __do_softirq \n"
217 " movl %%ebx,%%esp \n"
218 : "=b"(isp)
219 : "0"(isp)
220 : "memory", "cc", "edx", "ecx", "eax"
221 );
222 /* 206 /*
223 * Shouldnt happen, we returned above if in_interrupt(): 207 * Shouldnt happen, we returned above if in_interrupt():
224 */ 208 */
225 WARN_ON_ONCE(softirq_count()); 209 WARN_ON_ONCE(softirq_count());
226 } 210 }
227 211
228 local_irq_restore(flags); 212 local_irq_restore(flags);
229} 213}
214
215#else
216static inline int
217execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
230#endif 218#endif
231 219
232/* 220/*
221 * do_IRQ handles all normal device IRQ's (the special
222 * SMP cross-CPU interrupts have their own specific
223 * handlers).
224 */
225unsigned int do_IRQ(struct pt_regs *regs)
226{
227 struct pt_regs *old_regs;
228 /* high bit used in ret_from_ code */
229 int overflow, irq = ~regs->orig_ax;
230 struct irq_desc *desc = irq_desc + irq;
231
232 if (unlikely((unsigned)irq >= NR_IRQS)) {
233 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
234 __func__, irq);
235 BUG();
236 }
237
238 old_regs = set_irq_regs(regs);
239 irq_enter();
240
241 overflow = check_stack_overflow();
242
243 if (!execute_on_irq_stack(overflow, desc, irq)) {
244 if (unlikely(overflow))
245 print_stack_overflow();
246 desc->handle_irq(irq, desc);
247 }
248
249 irq_exit();
250 set_irq_regs(old_regs);
251 return 1;
252}
253
254/*
233 * Interrupt statistics: 255 * Interrupt statistics:
234 */ 256 */
235 257
@@ -313,16 +335,20 @@ skip:
313 per_cpu(irq_stat,j).irq_tlb_count); 335 per_cpu(irq_stat,j).irq_tlb_count);
314 seq_printf(p, " TLB shootdowns\n"); 336 seq_printf(p, " TLB shootdowns\n");
315#endif 337#endif
338#ifdef CONFIG_X86_MCE
316 seq_printf(p, "TRM: "); 339 seq_printf(p, "TRM: ");
317 for_each_online_cpu(j) 340 for_each_online_cpu(j)
318 seq_printf(p, "%10u ", 341 seq_printf(p, "%10u ",
319 per_cpu(irq_stat,j).irq_thermal_count); 342 per_cpu(irq_stat,j).irq_thermal_count);
320 seq_printf(p, " Thermal event interrupts\n"); 343 seq_printf(p, " Thermal event interrupts\n");
344#endif
345#ifdef CONFIG_X86_LOCAL_APIC
321 seq_printf(p, "SPU: "); 346 seq_printf(p, "SPU: ");
322 for_each_online_cpu(j) 347 for_each_online_cpu(j)
323 seq_printf(p, "%10u ", 348 seq_printf(p, "%10u ",
324 per_cpu(irq_stat,j).irq_spurious_count); 349 per_cpu(irq_stat,j).irq_spurious_count);
325 seq_printf(p, " Spurious interrupts\n"); 350 seq_printf(p, " Spurious interrupts\n");
351#endif
326 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 352 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
327#if defined(CONFIG_X86_IO_APIC) 353#if defined(CONFIG_X86_IO_APIC)
328 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); 354 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
@@ -331,6 +357,40 @@ skip:
331 return 0; 357 return 0;
332} 358}
333 359
360/*
361 * /proc/stat helpers
362 */
363u64 arch_irq_stat_cpu(unsigned int cpu)
364{
365 u64 sum = nmi_count(cpu);
366
367#ifdef CONFIG_X86_LOCAL_APIC
368 sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
369#endif
370#ifdef CONFIG_SMP
371 sum += per_cpu(irq_stat, cpu).irq_resched_count;
372 sum += per_cpu(irq_stat, cpu).irq_call_count;
373 sum += per_cpu(irq_stat, cpu).irq_tlb_count;
374#endif
375#ifdef CONFIG_X86_MCE
376 sum += per_cpu(irq_stat, cpu).irq_thermal_count;
377#endif
378#ifdef CONFIG_X86_LOCAL_APIC
379 sum += per_cpu(irq_stat, cpu).irq_spurious_count;
380#endif
381 return sum;
382}
383
384u64 arch_irq_stat(void)
385{
386 u64 sum = atomic_read(&irq_err_count);
387
388#ifdef CONFIG_X86_IO_APIC
389 sum += atomic_read(&irq_mis_count);
390#endif
391 return sum;
392}
393
334#ifdef CONFIG_HOTPLUG_CPU 394#ifdef CONFIG_HOTPLUG_CPU
335#include <mach_apic.h> 395#include <mach_apic.h>
336 396
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 3aac15466a91..1f78b238d8d2 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -135,6 +135,7 @@ skip:
135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); 135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
136 seq_printf(p, " TLB shootdowns\n"); 136 seq_printf(p, " TLB shootdowns\n");
137#endif 137#endif
138#ifdef CONFIG_X86_MCE
138 seq_printf(p, "TRM: "); 139 seq_printf(p, "TRM: ");
139 for_each_online_cpu(j) 140 for_each_online_cpu(j)
140 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); 141 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
@@ -143,6 +144,7 @@ skip:
143 for_each_online_cpu(j) 144 for_each_online_cpu(j)
144 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); 145 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
145 seq_printf(p, " Threshold APIC interrupts\n"); 146 seq_printf(p, " Threshold APIC interrupts\n");
147#endif
146 seq_printf(p, "SPU: "); 148 seq_printf(p, "SPU: ");
147 for_each_online_cpu(j) 149 for_each_online_cpu(j)
148 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); 150 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
@@ -153,6 +155,32 @@ skip:
153} 155}
154 156
155/* 157/*
158 * /proc/stat helpers
159 */
160u64 arch_irq_stat_cpu(unsigned int cpu)
161{
162 u64 sum = cpu_pda(cpu)->__nmi_count;
163
164 sum += cpu_pda(cpu)->apic_timer_irqs;
165#ifdef CONFIG_SMP
166 sum += cpu_pda(cpu)->irq_resched_count;
167 sum += cpu_pda(cpu)->irq_call_count;
168 sum += cpu_pda(cpu)->irq_tlb_count;
169#endif
170#ifdef CONFIG_X86_MCE
171 sum += cpu_pda(cpu)->irq_thermal_count;
172 sum += cpu_pda(cpu)->irq_threshold_count;
173#endif
174 sum += cpu_pda(cpu)->irq_spurious_count;
175 return sum;
176}
177
178u64 arch_irq_stat(void)
179{
180 return atomic_read(&irq_err_count);
181}
182
183/*
156 * do_IRQ handles all normal device IRQ's (the special 184 * do_IRQ handles all normal device IRQ's (the special
157 * SMP cross-CPU interrupts have their own specific 185 * SMP cross-CPU interrupts have their own specific
158 * handlers). 186 * handlers).
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
new file mode 100644
index 000000000000..d66914287ee1
--- /dev/null
+++ b/arch/x86/kernel/irqinit_32.c
@@ -0,0 +1,114 @@
1#include <linux/errno.h>
2#include <linux/signal.h>
3#include <linux/sched.h>
4#include <linux/ioport.h>
5#include <linux/interrupt.h>
6#include <linux/slab.h>
7#include <linux/random.h>
8#include <linux/init.h>
9#include <linux/kernel_stat.h>
10#include <linux/sysdev.h>
11#include <linux/bitops.h>
12
13#include <asm/atomic.h>
14#include <asm/system.h>
15#include <asm/io.h>
16#include <asm/timer.h>
17#include <asm/pgtable.h>
18#include <asm/delay.h>
19#include <asm/desc.h>
20#include <asm/apic.h>
21#include <asm/arch_hooks.h>
22#include <asm/i8259.h>
23
24
25
26/*
27 * Note that on a 486, we don't want to do a SIGFPE on an irq13
28 * as the irq is unreliable, and exception 16 works correctly
29 * (ie as explained in the intel literature). On a 386, you
30 * can't use exception 16 due to bad IBM design, so we have to
31 * rely on the less exact irq13.
32 *
33 * Careful.. Not only is IRQ13 unreliable, but it is also
34 * leads to races. IBM designers who came up with it should
35 * be shot.
36 */
37
38
39static irqreturn_t math_error_irq(int cpl, void *dev_id)
40{
41 extern void math_error(void __user *);
42 outb(0,0xF0);
43 if (ignore_fpu_irq || !boot_cpu_data.hard_math)
44 return IRQ_NONE;
45 math_error((void __user *)get_irq_regs()->ip);
46 return IRQ_HANDLED;
47}
48
49/*
50 * New motherboards sometimes make IRQ 13 be a PCI interrupt,
51 * so allow interrupt sharing.
52 */
53static struct irqaction fpu_irq = {
54 .handler = math_error_irq,
55 .mask = CPU_MASK_NONE,
56 .name = "fpu",
57};
58
59void __init init_ISA_irqs (void)
60{
61 int i;
62
63#ifdef CONFIG_X86_LOCAL_APIC
64 init_bsp_APIC();
65#endif
66 init_8259A(0);
67
68 /*
69 * 16 old-style INTA-cycle interrupts:
70 */
71 for (i = 0; i < 16; i++) {
72 set_irq_chip_and_handler_name(i, &i8259A_chip,
73 handle_level_irq, "XT");
74 }
75}
76
77/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79
80void __init native_init_IRQ(void)
81{
82 int i;
83
84 /* all the set up before the call gates are initialised */
85 pre_intr_init_hook();
86
87 /*
88 * Cover the whole vector space, no vector can escape
89 * us. (some of these will be overridden and become
90 * 'special' SMP interrupts)
91 */
92 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
93 int vector = FIRST_EXTERNAL_VECTOR + i;
94 if (i >= NR_IRQS)
95 break;
96 /* SYSCALL_VECTOR was reserved in trap_init. */
97 if (!test_bit(vector, used_vectors))
98 set_intr_gate(vector, interrupt[i]);
99 }
100
101 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates)
103 */
104 intr_init_hook();
105
106 /*
107 * External FPU? Set up irq13 if so, for
108 * original braindamaged IBM FERR coupling.
109 */
110 if (boot_cpu_data.hard_math && !cpu_has_fpu)
111 setup_irq(FPU_IRQ, &fpu_irq);
112
113 irq_ctx_init(smp_processor_id());
114}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
new file mode 100644
index 000000000000..31f49e8f46a7
--- /dev/null
+++ b/arch/x86/kernel/irqinit_64.c
@@ -0,0 +1,217 @@
1#include <linux/linkage.h>
2#include <linux/errno.h>
3#include <linux/signal.h>
4#include <linux/sched.h>
5#include <linux/ioport.h>
6#include <linux/interrupt.h>
7#include <linux/timex.h>
8#include <linux/slab.h>
9#include <linux/random.h>
10#include <linux/init.h>
11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h>
13#include <linux/bitops.h>
14
15#include <asm/acpi.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18#include <asm/io.h>
19#include <asm/hw_irq.h>
20#include <asm/pgtable.h>
21#include <asm/delay.h>
22#include <asm/desc.h>
23#include <asm/apic.h>
24#include <asm/i8259.h>
25
26/*
27 * Common place to define all x86 IRQ vectors
28 *
29 * This builds up the IRQ handler stubs using some ugly macros in irq.h
30 *
31 * These macros create the low-level assembly IRQ routines that save
32 * register context and call do_IRQ(). do_IRQ() then does all the
33 * operations that are needed to keep the AT (or SMP IOAPIC)
34 * interrupt-controller happy.
35 */
36
37#define IRQ_NAME2(nr) nr##_interrupt(void)
38#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
39
40/*
41 * SMP has a few special interrupts for IPI messages
42 */
43
44#define BUILD_IRQ(nr) \
45 asmlinkage void IRQ_NAME(nr); \
46 asm("\n.p2align\n" \
47 "IRQ" #nr "_interrupt:\n\t" \
48 "push $~(" #nr ") ; " \
49 "jmp common_interrupt");
50
51#define BI(x,y) \
52 BUILD_IRQ(x##y)
53
54#define BUILD_16_IRQS(x) \
55 BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
56 BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
57 BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
58 BI(x,c) BI(x,d) BI(x,e) BI(x,f)
59
60/*
61 * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
62 * (these are usually mapped to vectors 0x30-0x3f)
63 */
64
65/*
66 * The IO-APIC gives us many more interrupt sources. Most of these
67 * are unused but an SMP system is supposed to have enough memory ...
68 * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
69 * across the spectrum, so we really want to be prepared to get all
70 * of these. Plus, more powerful systems might have more than 64
71 * IO-APIC registers.
72 *
73 * (these are usually mapped into the 0x30-0xff vector range)
74 */
75 BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
76BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
77BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
78BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
79
80#undef BUILD_16_IRQS
81#undef BI
82
83
84#define IRQ(x,y) \
85 IRQ##x##y##_interrupt
86
87#define IRQLIST_16(x) \
88 IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
89 IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
90 IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
91 IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
92
93/* for the irq vectors */
94static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
95 IRQLIST_16(0x2), IRQLIST_16(0x3),
96 IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
97 IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
98 IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
99};
100
101#undef IRQ
102#undef IRQLIST_16
103
104
105
106
107/*
108 * IRQ2 is cascade interrupt to second interrupt controller
109 */
110
111static struct irqaction irq2 = {
112 .handler = no_action,
113 .mask = CPU_MASK_NONE,
114 .name = "cascade",
115};
116DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
117 [0 ... IRQ0_VECTOR - 1] = -1,
118 [IRQ0_VECTOR] = 0,
119 [IRQ1_VECTOR] = 1,
120 [IRQ2_VECTOR] = 2,
121 [IRQ3_VECTOR] = 3,
122 [IRQ4_VECTOR] = 4,
123 [IRQ5_VECTOR] = 5,
124 [IRQ6_VECTOR] = 6,
125 [IRQ7_VECTOR] = 7,
126 [IRQ8_VECTOR] = 8,
127 [IRQ9_VECTOR] = 9,
128 [IRQ10_VECTOR] = 10,
129 [IRQ11_VECTOR] = 11,
130 [IRQ12_VECTOR] = 12,
131 [IRQ13_VECTOR] = 13,
132 [IRQ14_VECTOR] = 14,
133 [IRQ15_VECTOR] = 15,
134 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
135};
136
137static void __init init_ISA_irqs (void)
138{
139 int i;
140
141 init_bsp_APIC();
142 init_8259A(0);
143
144 for (i = 0; i < NR_IRQS; i++) {
145 irq_desc[i].status = IRQ_DISABLED;
146 irq_desc[i].action = NULL;
147 irq_desc[i].depth = 1;
148
149 if (i < 16) {
150 /*
151 * 16 old-style INTA-cycle interrupts:
152 */
153 set_irq_chip_and_handler_name(i, &i8259A_chip,
154 handle_level_irq, "XT");
155 } else {
156 /*
157 * 'high' PCI IRQs filled in on demand
158 */
159 irq_desc[i].chip = &no_irq_chip;
160 }
161 }
162}
163
164void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
165
166void __init native_init_IRQ(void)
167{
168 int i;
169
170 init_ISA_irqs();
171 /*
172 * Cover the whole vector space, no vector can escape
173 * us. (some of these will be overridden and become
174 * 'special' SMP interrupts)
175 */
176 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
177 int vector = FIRST_EXTERNAL_VECTOR + i;
178 if (vector != IA32_SYSCALL_VECTOR)
179 set_intr_gate(vector, interrupt[i]);
180 }
181
182#ifdef CONFIG_SMP
183 /*
184 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
185 * IPI, driven by wakeup.
186 */
187 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
188
189 /* IPIs for invalidation */
190 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
191 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
192 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
193 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
194 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
195 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
196 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
197 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
198
199 /* IPI for generic function call */
200 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
201
202 /* Low priority IPI to cleanup after moving an irq */
203 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
204#endif
205 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
206 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
207
208 /* self generated IPI for local APIC timer */
209 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
210
211 /* IPI vectors for APIC spurious and error interrupts */
212 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
213 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
214
215 if (!acpi_ioapic)
216 setup_irq(2, &irq2);
217}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 0224c3637c73..21f2bae98c15 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -20,9 +20,9 @@
20#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
21 21
22#ifdef CONFIG_SMP 22#ifdef CONFIG_SMP
23static void flush_ldt(void *null) 23static void flush_ldt(void *current_mm)
24{ 24{
25 if (current->active_mm) 25 if (current->active_mm == current_mm)
26 load_LDT(&current->active_mm->context); 26 load_LDT(&current->active_mm->context);
27} 27}
28#endif 28#endif
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
68 load_LDT(pc); 68 load_LDT(pc);
69 mask = cpumask_of_cpu(smp_processor_id()); 69 mask = cpumask_of_cpu(smp_processor_id());
70 if (!cpus_equal(current->mm->cpu_vm_mask, mask)) 70 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
71 smp_call_function(flush_ldt, NULL, 1, 1); 71 smp_call_function(flush_ldt, current->mm, 1, 1);
72 preempt_enable(); 72 preempt_enable();
73#else 73#else
74 load_LDT(pc); 74 load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index d0b234c9fc31..f4960171bc66 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit)
39 curidt.address = (unsigned long)newidt; 39 curidt.address = (unsigned long)newidt;
40 40
41 load_idt(&curidt); 41 load_idt(&curidt);
42}; 42}
43 43
44 44
45static void set_gdt(void *newgdt, __u16 limit) 45static void set_gdt(void *newgdt, __u16 limit)
@@ -51,7 +51,7 @@ static void set_gdt(void *newgdt, __u16 limit)
51 curgdt.address = (unsigned long)newgdt; 51 curgdt.address = (unsigned long)newgdt;
52 52
53 load_gdt(&curgdt); 53 load_gdt(&curgdt);
54}; 54}
55 55
56static void load_segments(void) 56static void load_segments(void)
57{ 57{
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 576a03db4511..7830dc4a8380 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -110,7 +110,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
110{ 110{
111 pgd_t *level4p; 111 pgd_t *level4p;
112 level4p = (pgd_t *)__va(start_pgtable); 112 level4p = (pgd_t *)__va(start_pgtable);
113 return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); 113 return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
114} 114}
115 115
116static void set_idt(void *newidt, u16 limit) 116static void set_idt(void *newidt, u16 limit)
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 69729e38b78a..9758fea87c5b 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -5,13 +5,14 @@
5 * 2006 Shaohua Li <shaohua.li@intel.com> 5 * 2006 Shaohua Li <shaohua.li@intel.com>
6 * 6 *
7 * This driver allows to upgrade microcode on Intel processors 7 * This driver allows to upgrade microcode on Intel processors
8 * belonging to IA-32 family - PentiumPro, Pentium II, 8 * belonging to IA-32 family - PentiumPro, Pentium II,
9 * Pentium III, Xeon, Pentium 4, etc. 9 * Pentium III, Xeon, Pentium 4, etc.
10 * 10 *
11 * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, 11 * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
12 * Order Number 245472 or free download from: 12 * Software Developer's Manual
13 * 13 * Order Number 253668 or free download from:
14 * http://developer.intel.com/design/pentium4/manuals/245472.htm 14 *
15 * http://developer.intel.com/design/pentium4/manuals/253668.htm
15 * 16 *
16 * For more information, go to http://www.urbanmyth.org/microcode 17 * For more information, go to http://www.urbanmyth.org/microcode
17 * 18 *
@@ -58,12 +59,12 @@
58 * nature of implementation. 59 * nature of implementation.
59 * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> 60 * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
60 * Fix the panic when writing zero-length microcode chunk. 61 * Fix the panic when writing zero-length microcode chunk.
61 * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, 62 * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
62 * Jun Nakajima <jun.nakajima@intel.com> 63 * Jun Nakajima <jun.nakajima@intel.com>
63 * Support for the microcode updates in the new format. 64 * Support for the microcode updates in the new format.
64 * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> 65 * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
65 * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl 66 * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
66 * because we no longer hold a copy of applied microcode 67 * because we no longer hold a copy of applied microcode
67 * in kernel memory. 68 * in kernel memory.
68 * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> 69 * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
69 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
@@ -320,11 +321,11 @@ static void apply_microcode(int cpu)
320 return; 321 return;
321 322
322 /* serialize access to the physical write to MSR 0x79 */ 323 /* serialize access to the physical write to MSR 0x79 */
323 spin_lock_irqsave(&microcode_update_lock, flags); 324 spin_lock_irqsave(&microcode_update_lock, flags);
324 325
325 /* write microcode via MSR 0x79 */ 326 /* write microcode via MSR 0x79 */
326 wrmsr(MSR_IA32_UCODE_WRITE, 327 wrmsr(MSR_IA32_UCODE_WRITE,
327 (unsigned long) uci->mc->bits, 328 (unsigned long) uci->mc->bits,
328 (unsigned long) uci->mc->bits >> 16 >> 16); 329 (unsigned long) uci->mc->bits >> 16 >> 16);
329 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 330 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
330 331
@@ -341,7 +342,7 @@ static void apply_microcode(int cpu)
341 return; 342 return;
342 } 343 }
343 printk(KERN_INFO "microcode: CPU%d updated from revision " 344 printk(KERN_INFO "microcode: CPU%d updated from revision "
344 "0x%x to 0x%x, date = %08x \n", 345 "0x%x to 0x%x, date = %08x \n",
345 cpu_num, uci->rev, val[1], uci->mc->hdr.date); 346 cpu_num, uci->rev, val[1], uci->mc->hdr.date);
346 uci->rev = val[1]; 347 uci->rev = val[1];
347} 348}
@@ -534,7 +535,7 @@ static int cpu_request_microcode(int cpu)
534 c->x86, c->x86_model, c->x86_mask); 535 c->x86, c->x86_model, c->x86_mask);
535 error = request_firmware(&firmware, name, &microcode_pdev->dev); 536 error = request_firmware(&firmware, name, &microcode_pdev->dev);
536 if (error) { 537 if (error) {
537 pr_debug("microcode: ucode data file %s load failed\n", name); 538 pr_debug("microcode: data file %s load failed\n", name);
538 return error; 539 return error;
539 } 540 }
540 buf = firmware->data; 541 buf = firmware->data;
@@ -805,6 +806,9 @@ static int __init microcode_init (void)
805{ 806{
806 int error; 807 int error;
807 808
809 printk(KERN_INFO
810 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
811
808 error = microcode_dev_init(); 812 error = microcode_dev_init();
809 if (error) 813 if (error)
810 return error; 814 return error;
@@ -825,9 +829,6 @@ static int __init microcode_init (void)
825 } 829 }
826 830
827 register_hotcpu_notifier(&mc_cpu_notifier); 831 register_hotcpu_notifier(&mc_cpu_notifier);
828
829 printk(KERN_INFO
830 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
831 return 0; 832 return 0;
832} 833}
833 834
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index edc5fbfe85c0..fdfdc550b366 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -12,6 +12,7 @@
12#include <asm/io.h> 12#include <asm/io.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h>
15 16
16#include "../pci/pci.h" 17#include "../pci/pci.h"
17 18
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 404683b94e79..3b25e49380c6 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -25,6 +25,8 @@
25#include <asm/proto.h> 25#include <asm/proto.h>
26#include <asm/acpi.h> 26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h>
29#include <asm/trampoline.h>
28 30
29#include <mach_apic.h> 31#include <mach_apic.h>
30#ifdef CONFIG_X86_32 32#ifdef CONFIG_X86_32
@@ -32,28 +34,6 @@
32#include <mach_mpparse.h> 34#include <mach_mpparse.h>
33#endif 35#endif
34 36
35/* Have we found an MP table */
36int smp_found_config;
37
38/*
39 * Various Linux-internal data structures created from the
40 * MP-table.
41 */
42#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
43int mp_bus_id_to_type[MAX_MP_BUSSES];
44#endif
45
46DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
47int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
48
49static int mp_current_pci_id;
50
51int pic_mode;
52
53/*
54 * Intel MP BIOS table parsing routines:
55 */
56
57/* 37/*
58 * Checksum an MP configuration block. 38 * Checksum an MP configuration block.
59 */ 39 */
@@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len)
69} 49}
70 50
71#ifdef CONFIG_X86_NUMAQ 51#ifdef CONFIG_X86_NUMAQ
52int found_numaq;
72/* 53/*
73 * Have to match translation table entries to main table entries by counter 54 * Have to match translation table entries to main table entries by counter
74 * hence the mpc_record variable .... can't see a less disgusting way of 55 * hence the mpc_record variable .... can't see a less disgusting way of
75 * doing this .... 56 * doing this ....
76 */ 57 */
58struct mpc_config_translation {
59 unsigned char mpc_type;
60 unsigned char trans_len;
61 unsigned char trans_type;
62 unsigned char trans_quad;
63 unsigned char trans_global;
64 unsigned char trans_local;
65 unsigned short trans_reserved;
66};
67
77 68
78static int mpc_record; 69static int mpc_record;
79static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] 70static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
80 __cpuinitdata; 71 __cpuinitdata;
72
73static inline int generate_logical_apicid(int quad, int phys_apicid)
74{
75 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
76}
77
78
79static inline int mpc_apic_id(struct mpc_config_processor *m,
80 struct mpc_config_translation *translation_record)
81{
82 int quad = translation_record->trans_quad;
83 int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
84
85 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
86 m->mpc_apicid,
87 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
88 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
89 m->mpc_apicver, quad, logical_apicid);
90 return logical_apicid;
91}
92
93int mp_bus_id_to_node[MAX_MP_BUSSES];
94
95int mp_bus_id_to_local[MAX_MP_BUSSES];
96
97static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
98 struct mpc_config_translation *translation)
99{
100 int quad = translation->trans_quad;
101 int local = translation->trans_local;
102
103 mp_bus_id_to_node[m->mpc_busid] = quad;
104 mp_bus_id_to_local[m->mpc_busid] = local;
105 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
106 m->mpc_busid, name, quad);
107}
108
109int quad_local_to_mp_bus_id [NR_CPUS/4][4];
110static void mpc_oem_pci_bus(struct mpc_config_bus *m,
111 struct mpc_config_translation *translation)
112{
113 int quad = translation->trans_quad;
114 int local = translation->trans_local;
115
116 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
117}
118
81#endif 119#endif
82 120
83static void __cpuinit MP_processor_info(struct mpc_config_processor *m) 121static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
@@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
90 return; 128 return;
91 } 129 }
92#ifdef CONFIG_X86_NUMAQ 130#ifdef CONFIG_X86_NUMAQ
93 apicid = mpc_apic_id(m, translation_table[mpc_record]); 131 if (found_numaq)
132 apicid = mpc_apic_id(m, translation_table[mpc_record]);
133 else
134 apicid = m->mpc_apicid;
94#else 135#else
95 apicid = m->mpc_apicid; 136 apicid = m->mpc_apicid;
96#endif 137#endif
@@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
103 generic_processor_info(apicid, m->mpc_apicver); 144 generic_processor_info(apicid, m->mpc_apicver);
104} 145}
105 146
147#ifdef CONFIG_X86_IO_APIC
106static void __init MP_bus_info(struct mpc_config_bus *m) 148static void __init MP_bus_info(struct mpc_config_bus *m)
107{ 149{
108 char str[7]; 150 char str[7];
109
110 memcpy(str, m->mpc_bustype, 6); 151 memcpy(str, m->mpc_bustype, 6);
111 str[6] = 0; 152 str[6] = 0;
112 153
113#ifdef CONFIG_X86_NUMAQ 154#ifdef CONFIG_X86_NUMAQ
114 mpc_oem_bus_info(m, str, translation_table[mpc_record]); 155 if (found_numaq)
156 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
115#else 157#else
116 Dprintk("Bus #%d is %s\n", m->mpc_busid, str); 158 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
117#endif 159#endif
118 160
119#if MAX_MP_BUSSES < 256 161#if MAX_MP_BUSSES < 256
@@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
132#endif 174#endif
133 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 175 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
134#ifdef CONFIG_X86_NUMAQ 176#ifdef CONFIG_X86_NUMAQ
135 mpc_oem_pci_bus(m, translation_table[mpc_record]); 177 if (found_numaq)
178 mpc_oem_pci_bus(m, translation_table[mpc_record]);
136#endif 179#endif
137 clear_bit(m->mpc_busid, mp_bus_not_pci); 180 clear_bit(m->mpc_busid, mp_bus_not_pci);
138 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
139 mp_current_pci_id++;
140#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 181#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
141 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 182 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
142 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 183 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
@@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
147 } else 188 } else
148 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 189 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
149} 190}
191#endif
150 192
151#ifdef CONFIG_X86_IO_APIC 193#ifdef CONFIG_X86_IO_APIC
152 194
@@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
176 if (bad_ioapic(m->mpc_apicaddr)) 218 if (bad_ioapic(m->mpc_apicaddr))
177 return; 219 return;
178 220
179 mp_ioapics[nr_ioapics] = *m; 221 mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
222 mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
223 mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
224 mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
225 mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
180 nr_ioapics++; 226 nr_ioapics++;
181} 227}
182 228
183static void __init MP_intsrc_info(struct mpc_config_intsrc *m) 229static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
184{ 230{
185 mp_irqs[mp_irq_entries] = *m; 231 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
186 Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
187 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 232 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
188 m->mpc_irqtype, m->mpc_irqflag & 3, 233 m->mpc_irqtype, m->mpc_irqflag & 3,
189 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 234 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
190 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); 235 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
236}
237
238static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
239{
240 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
241 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
242 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
243 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
244 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
245}
246
247static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
248 struct mp_config_intsrc *mp_irq)
249{
250 mp_irq->mp_dstapic = m->mpc_dstapic;
251 mp_irq->mp_type = m->mpc_type;
252 mp_irq->mp_irqtype = m->mpc_irqtype;
253 mp_irq->mp_irqflag = m->mpc_irqflag;
254 mp_irq->mp_srcbus = m->mpc_srcbus;
255 mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
256 mp_irq->mp_dstirq = m->mpc_dstirq;
257}
258
259static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
260 struct mpc_config_intsrc *m)
261{
262 m->mpc_dstapic = mp_irq->mp_dstapic;
263 m->mpc_type = mp_irq->mp_type;
264 m->mpc_irqtype = mp_irq->mp_irqtype;
265 m->mpc_irqflag = mp_irq->mp_irqflag;
266 m->mpc_srcbus = mp_irq->mp_srcbus;
267 m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
268 m->mpc_dstirq = mp_irq->mp_dstirq;
269}
270
271static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
272 struct mpc_config_intsrc *m)
273{
274 if (mp_irq->mp_dstapic != m->mpc_dstapic)
275 return 1;
276 if (mp_irq->mp_type != m->mpc_type)
277 return 2;
278 if (mp_irq->mp_irqtype != m->mpc_irqtype)
279 return 3;
280 if (mp_irq->mp_irqflag != m->mpc_irqflag)
281 return 4;
282 if (mp_irq->mp_srcbus != m->mpc_srcbus)
283 return 5;
284 if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
285 return 6;
286 if (mp_irq->mp_dstirq != m->mpc_dstirq)
287 return 7;
288
289 return 0;
290}
291
292static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
293{
294 int i;
295
296 print_MP_intsrc_info(m);
297
298 for (i = 0; i < mp_irq_entries; i++) {
299 if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
300 return;
301 }
302
303 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
191 if (++mp_irq_entries == MAX_IRQ_SOURCES) 304 if (++mp_irq_entries == MAX_IRQ_SOURCES)
192 panic("Max # of irq sources exceeded!!\n"); 305 panic("Max # of irq sources exceeded!!\n");
193} 306}
@@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
196 309
197static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) 310static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
198{ 311{
199 Dprintk("Lint: type %d, pol %d, trig %d, bus %d," 312 printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
200 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 313 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
201 m->mpc_irqtype, m->mpc_irqflag & 3, 314 m->mpc_irqtype, m->mpc_irqflag & 3,
202 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, 315 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
266 } 379 }
267} 380}
268 381
269static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, 382void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
270 char *productid) 383 char *productid)
271{ 384{
272 if (strncmp(oem, "IBM NUMA", 8)) 385 if (strncmp(oem, "IBM NUMA", 8))
273 printk("Warning! May not be a NUMA-Q system!\n"); 386 printk("Warning! Not a NUMA-Q system!\n");
387 else
388 found_numaq = 1;
389
274 if (mpc->mpc_oemptr) 390 if (mpc->mpc_oemptr)
275 smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, 391 smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
276 mpc->mpc_oemsize); 392 mpc->mpc_oemsize);
@@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
281 * Read/parse the MPC 397 * Read/parse the MPC
282 */ 398 */
283 399
284static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) 400static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
401 char *str)
285{ 402{
286 char str[16];
287 char oem[10];
288 int count = sizeof(*mpc);
289 unsigned char *mpt = ((unsigned char *)mpc) + count;
290 403
291 if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) { 404 if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
292 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n", 405 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
@@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
309 } 422 }
310 memcpy(oem, mpc->mpc_oem, 8); 423 memcpy(oem, mpc->mpc_oem, 8);
311 oem[8] = 0; 424 oem[8] = 0;
312 printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem); 425 printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
313 426
314 memcpy(str, mpc->mpc_productid, 12); 427 memcpy(str, mpc->mpc_productid, 12);
315 str[12] = 0; 428 str[12] = 0;
316 printk("Product ID: %s ", str);
317 429
318#ifdef CONFIG_X86_32 430 printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
319 mps_oem_check(mpc, oem, str);
320#endif
321 printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
322 431
323 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic); 432 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
324 433
434 return 1;
435}
436
437static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
438{
439 char str[16];
440 char oem[10];
441
442 int count = sizeof(*mpc);
443 unsigned char *mpt = ((unsigned char *)mpc) + count;
444
445 if (!smp_check_mpc(mpc, oem, str))
446 return 0;
447
448#ifdef CONFIG_X86_32
449 /*
450 * need to make sure summit and es7000's mps_oem_check is safe to be
451 * called early via genericarch 's mps_oem_check
452 */
453 if (early) {
454#ifdef CONFIG_X86_NUMAQ
455 numaq_mps_oem_check(mpc, oem, str);
456#endif
457 } else
458 mps_oem_check(mpc, oem, str);
459#endif
460
325 /* save the local APIC address, it might be non-default */ 461 /* save the local APIC address, it might be non-default */
326 if (!acpi_lapic) 462 if (!acpi_lapic)
327 mp_lapic_addr = mpc->mpc_lapic; 463 mp_lapic_addr = mpc->mpc_lapic;
@@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
352 { 488 {
353 struct mpc_config_bus *m = 489 struct mpc_config_bus *m =
354 (struct mpc_config_bus *)mpt; 490 (struct mpc_config_bus *)mpt;
491#ifdef CONFIG_X86_IO_APIC
355 MP_bus_info(m); 492 MP_bus_info(m);
493#endif
356 mpt += sizeof(*m); 494 mpt += sizeof(*m);
357 count += sizeof(*m); 495 count += sizeof(*m);
358 break; 496 break;
@@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
402 ++mpc_record; 540 ++mpc_record;
403#endif 541#endif
404 } 542 }
543
544#ifdef CONFIG_X86_GENERICARCH
545 generic_bigsmp_probe();
546#endif
547
405 setup_apic_routing(); 548 setup_apic_routing();
406 if (!num_processors) 549 if (!num_processors)
407 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 550 printk(KERN_ERR "MPTABLE: no processors registered!\n");
@@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
427 intsrc.mpc_type = MP_INTSRC; 570 intsrc.mpc_type = MP_INTSRC;
428 intsrc.mpc_irqflag = 0; /* conforming */ 571 intsrc.mpc_irqflag = 0; /* conforming */
429 intsrc.mpc_srcbus = 0; 572 intsrc.mpc_srcbus = 0;
430 intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; 573 intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
431 574
432 intsrc.mpc_irqtype = mp_INT; 575 intsrc.mpc_irqtype = mp_INT;
433 576
@@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
488 MP_intsrc_info(&intsrc); 631 MP_intsrc_info(&intsrc);
489} 632}
490 633
491#endif
492 634
493static inline void __init construct_default_ISA_mptable(int mpc_default_type) 635static void construct_ioapic_table(int mpc_default_type)
494{ 636{
495 struct mpc_config_processor processor;
496 struct mpc_config_bus bus;
497#ifdef CONFIG_X86_IO_APIC
498 struct mpc_config_ioapic ioapic; 637 struct mpc_config_ioapic ioapic;
499#endif 638 struct mpc_config_bus bus;
500 struct mpc_config_lintsrc lintsrc;
501 int linttypes[2] = { mp_ExtINT, mp_NMI };
502 int i;
503
504 /*
505 * local APIC has default address
506 */
507 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
508
509 /*
510 * 2 CPUs, numbered 0 & 1.
511 */
512 processor.mpc_type = MP_PROCESSOR;
513 /* Either an integrated APIC or a discrete 82489DX. */
514 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
515 processor.mpc_cpuflag = CPU_ENABLED;
516 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
517 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
518 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
519 processor.mpc_reserved[0] = 0;
520 processor.mpc_reserved[1] = 0;
521 for (i = 0; i < 2; i++) {
522 processor.mpc_apicid = i;
523 MP_processor_info(&processor);
524 }
525 639
526 bus.mpc_type = MP_BUS; 640 bus.mpc_type = MP_BUS;
527 bus.mpc_busid = 0; 641 bus.mpc_busid = 0;
@@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
550 MP_bus_info(&bus); 664 MP_bus_info(&bus);
551 } 665 }
552 666
553#ifdef CONFIG_X86_IO_APIC
554 ioapic.mpc_type = MP_IOAPIC; 667 ioapic.mpc_type = MP_IOAPIC;
555 ioapic.mpc_apicid = 2; 668 ioapic.mpc_apicid = 2;
556 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; 669 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
@@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
562 * We set up most of the low 16 IO-APIC pins according to MPS rules. 675 * We set up most of the low 16 IO-APIC pins according to MPS rules.
563 */ 676 */
564 construct_default_ioirq_mptable(mpc_default_type); 677 construct_default_ioirq_mptable(mpc_default_type);
678}
679#else
680static inline void construct_ioapic_table(int mpc_default_type) { }
565#endif 681#endif
682
683static inline void __init construct_default_ISA_mptable(int mpc_default_type)
684{
685 struct mpc_config_processor processor;
686 struct mpc_config_lintsrc lintsrc;
687 int linttypes[2] = { mp_ExtINT, mp_NMI };
688 int i;
689
690 /*
691 * local APIC has default address
692 */
693 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
694
695 /*
696 * 2 CPUs, numbered 0 & 1.
697 */
698 processor.mpc_type = MP_PROCESSOR;
699 /* Either an integrated APIC or a discrete 82489DX. */
700 processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
701 processor.mpc_cpuflag = CPU_ENABLED;
702 processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
703 (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
704 processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
705 processor.mpc_reserved[0] = 0;
706 processor.mpc_reserved[1] = 0;
707 for (i = 0; i < 2; i++) {
708 processor.mpc_apicid = i;
709 MP_processor_info(&processor);
710 }
711
712 construct_ioapic_table(mpc_default_type);
713
566 lintsrc.mpc_type = MP_LINTSRC; 714 lintsrc.mpc_type = MP_LINTSRC;
567 lintsrc.mpc_irqflag = 0; /* conforming */ 715 lintsrc.mpc_irqflag = 0; /* conforming */
568 lintsrc.mpc_srcbusid = 0; 716 lintsrc.mpc_srcbusid = 0;
@@ -578,12 +726,22 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
578static struct intel_mp_floating *mpf_found; 726static struct intel_mp_floating *mpf_found;
579 727
580/* 728/*
729 * Machine specific quirk for finding the SMP config before other setup
730 * activities destroy the table:
731 */
732int (*mach_get_smp_config_quirk)(unsigned int early);
733
734/*
581 * Scan the memory blocks for an SMP configuration block. 735 * Scan the memory blocks for an SMP configuration block.
582 */ 736 */
583static void __init __get_smp_config(unsigned early) 737static void __init __get_smp_config(unsigned int early)
584{ 738{
585 struct intel_mp_floating *mpf = mpf_found; 739 struct intel_mp_floating *mpf = mpf_found;
586 740
741 if (mach_get_smp_config_quirk) {
742 if (mach_get_smp_config_quirk(early))
743 return;
744 }
587 if (acpi_lapic && early) 745 if (acpi_lapic && early)
588 return; 746 return;
589 /* 747 /*
@@ -600,7 +758,7 @@ static void __init __get_smp_config(unsigned early)
600 758
601 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 759 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
602 mpf->mpf_specification); 760 mpf->mpf_specification);
603#ifdef CONFIG_X86_32 761#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
604 if (mpf->mpf_feature2 & (1 << 7)) { 762 if (mpf->mpf_feature2 & (1 << 7)) {
605 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 763 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
606 pic_mode = 1; 764 pic_mode = 1;
@@ -632,7 +790,9 @@ static void __init __get_smp_config(unsigned early)
632 * override the defaults. 790 * override the defaults.
633 */ 791 */
634 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 792 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
793#ifdef CONFIG_X86_LOCAL_APIC
635 smp_found_config = 0; 794 smp_found_config = 0;
795#endif
636 printk(KERN_ERR 796 printk(KERN_ERR
637 "BIOS bug, MP table errors detected!...\n"); 797 "BIOS bug, MP table errors detected!...\n");
638 printk(KERN_ERR "... disabling SMP support. " 798 printk(KERN_ERR "... disabling SMP support. "
@@ -689,7 +849,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
689 unsigned int *bp = phys_to_virt(base); 849 unsigned int *bp = phys_to_virt(base);
690 struct intel_mp_floating *mpf; 850 struct intel_mp_floating *mpf;
691 851
692 Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); 852 printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
693 BUILD_BUG_ON(sizeof(*mpf) != 16); 853 BUILD_BUG_ON(sizeof(*mpf) != 16);
694 854
695 while (length > 0) { 855 while (length > 0) {
@@ -699,15 +859,21 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
699 !mpf_checksum((unsigned char *)bp, 16) && 859 !mpf_checksum((unsigned char *)bp, 16) &&
700 ((mpf->mpf_specification == 1) 860 ((mpf->mpf_specification == 1)
701 || (mpf->mpf_specification == 4))) { 861 || (mpf->mpf_specification == 4))) {
702 862#ifdef CONFIG_X86_LOCAL_APIC
703 smp_found_config = 1; 863 smp_found_config = 1;
864#endif
704 mpf_found = mpf; 865 mpf_found = mpf;
705#ifdef CONFIG_X86_32 866
706 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", 867 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
707 mpf, virt_to_phys(mpf)); 868 mpf, virt_to_phys(mpf));
708 reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, 869
870 if (!reserve)
871 return 1;
872 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
709 BOOTMEM_DEFAULT); 873 BOOTMEM_DEFAULT);
710 if (mpf->mpf_physptr) { 874 if (mpf->mpf_physptr) {
875 unsigned long size = PAGE_SIZE;
876#ifdef CONFIG_X86_32
711 /* 877 /*
712 * We cannot access to MPC table to compute 878 * We cannot access to MPC table to compute
713 * table size yet, as only few megabytes from 879 * table size yet, as only few megabytes from
@@ -717,24 +883,15 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
717 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 883 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
718 * in reserve_bootmem. 884 * in reserve_bootmem.
719 */ 885 */
720 unsigned long size = PAGE_SIZE;
721 unsigned long end = max_low_pfn * PAGE_SIZE; 886 unsigned long end = max_low_pfn * PAGE_SIZE;
722 if (mpf->mpf_physptr + size > end) 887 if (mpf->mpf_physptr + size > end)
723 size = end - mpf->mpf_physptr; 888 size = end - mpf->mpf_physptr;
724 reserve_bootmem(mpf->mpf_physptr, size, 889#endif
890 reserve_bootmem_generic(mpf->mpf_physptr, size,
725 BOOTMEM_DEFAULT); 891 BOOTMEM_DEFAULT);
726 } 892 }
727 893
728#else 894 return 1;
729 if (!reserve)
730 return 1;
731
732 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
733 if (mpf->mpf_physptr)
734 reserve_bootmem_generic(mpf->mpf_physptr,
735 PAGE_SIZE);
736#endif
737 return 1;
738 } 895 }
739 bp += 4; 896 bp += 4;
740 length -= 16; 897 length -= 16;
@@ -742,10 +899,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
742 return 0; 899 return 0;
743} 900}
744 901
745static void __init __find_smp_config(unsigned reserve) 902int (*mach_find_smp_config_quirk)(unsigned int reserve);
903
904static void __init __find_smp_config(unsigned int reserve)
746{ 905{
747 unsigned int address; 906 unsigned int address;
748 907
908 if (mach_find_smp_config_quirk) {
909 if (mach_find_smp_config_quirk(reserve))
910 return;
911 }
749 /* 912 /*
750 * FIXME: Linux assumes you have 640K of base ram.. 913 * FIXME: Linux assumes you have 640K of base ram..
751 * this continues the error... 914 * this continues the error...
@@ -790,298 +953,294 @@ void __init find_smp_config(void)
790 __find_smp_config(1); 953 __find_smp_config(1);
791} 954}
792 955
793/* -------------------------------------------------------------------------- 956#ifdef CONFIG_X86_IO_APIC
794 ACPI-based MP Configuration 957static u8 __initdata irq_used[MAX_IRQ_SOURCES];
795 -------------------------------------------------------------------------- */
796 958
797/* 959static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
798 * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: 960{
799 */ 961 int i;
800int es7000_plat;
801 962
802#ifdef CONFIG_ACPI 963 if (m->mpc_irqtype != mp_INT)
964 return 0;
803 965
804#ifdef CONFIG_X86_IO_APIC 966 if (m->mpc_irqflag != 0x0f)
967 return 0;
805 968
806#define MP_ISA_BUS 0 969 /* not legacy */
807 970
808extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; 971 for (i = 0; i < mp_irq_entries; i++) {
972 if (mp_irqs[i].mp_irqtype != mp_INT)
973 continue;
809 974
810static int mp_find_ioapic(int gsi) 975 if (mp_irqs[i].mp_irqflag != 0x0f)
811{ 976 continue;
812 int i = 0;
813 977
814 /* Find the IOAPIC that manages this GSI. */ 978 if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
815 for (i = 0; i < nr_ioapics; i++) { 979 continue;
816 if ((gsi >= mp_ioapic_routing[i].gsi_base) 980 if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
817 && (gsi <= mp_ioapic_routing[i].gsi_end)) 981 continue;
818 return i; 982 if (irq_used[i]) {
983 /* already claimed */
984 return -2;
985 }
986 irq_used[i] = 1;
987 return i;
819 } 988 }
820 989
821 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); 990 /* not found */
822 return -1; 991 return -1;
823} 992}
824 993
825static u8 __init uniq_ioapic_id(u8 id) 994#define SPARE_SLOT_NUM 20
826{ 995
827#ifdef CONFIG_X86_32 996static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
828 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
829 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
830 return io_apic_get_unique_id(nr_ioapics, id);
831 else
832 return id;
833#else
834 int i;
835 DECLARE_BITMAP(used, 256);
836 bitmap_zero(used, 256);
837 for (i = 0; i < nr_ioapics; i++) {
838 struct mpc_config_ioapic *ia = &mp_ioapics[i];
839 __set_bit(ia->mpc_apicid, used);
840 }
841 if (!test_bit(id, used))
842 return id;
843 return find_first_zero_bit(used, 256);
844#endif 997#endif
845}
846 998
847void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) 999static int __init replace_intsrc_all(struct mp_config_table *mpc,
1000 unsigned long mpc_new_phys,
1001 unsigned long mpc_new_length)
848{ 1002{
849 int idx = 0; 1003#ifdef CONFIG_X86_IO_APIC
850 1004 int i;
851 if (bad_ioapic(address)) 1005 int nr_m_spare = 0;
852 return; 1006#endif
853 1007
854 idx = nr_ioapics; 1008 int count = sizeof(*mpc);
1009 unsigned char *mpt = ((unsigned char *)mpc) + count;
855 1010
856 mp_ioapics[idx].mpc_type = MP_IOAPIC; 1011 printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
857 mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; 1012 while (count < mpc->mpc_length) {
858 mp_ioapics[idx].mpc_apicaddr = address; 1013 switch (*mpt) {
1014 case MP_PROCESSOR:
1015 {
1016 struct mpc_config_processor *m =
1017 (struct mpc_config_processor *)mpt;
1018 mpt += sizeof(*m);
1019 count += sizeof(*m);
1020 break;
1021 }
1022 case MP_BUS:
1023 {
1024 struct mpc_config_bus *m =
1025 (struct mpc_config_bus *)mpt;
1026 mpt += sizeof(*m);
1027 count += sizeof(*m);
1028 break;
1029 }
1030 case MP_IOAPIC:
1031 {
1032 mpt += sizeof(struct mpc_config_ioapic);
1033 count += sizeof(struct mpc_config_ioapic);
1034 break;
1035 }
1036 case MP_INTSRC:
1037 {
1038#ifdef CONFIG_X86_IO_APIC
1039 struct mpc_config_intsrc *m =
1040 (struct mpc_config_intsrc *)mpt;
859 1041
860 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 1042 printk(KERN_INFO "OLD ");
861 mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); 1043 print_MP_intsrc_info(m);
862#ifdef CONFIG_X86_32 1044 i = get_MP_intsrc_index(m);
863 mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); 1045 if (i > 0) {
864#else 1046 assign_to_mpc_intsrc(&mp_irqs[i], m);
865 mp_ioapics[idx].mpc_apicver = 0; 1047 printk(KERN_INFO "NEW ");
1048 print_mp_irq_info(&mp_irqs[i]);
1049 } else if (!i) {
1050 /* legacy, do nothing */
1051 } else if (nr_m_spare < SPARE_SLOT_NUM) {
1052 /*
1053 * not found (-1), or duplicated (-2)
1054 * are invalid entries,
1055 * we need to use the slot later
1056 */
1057 m_spare[nr_m_spare] = m;
1058 nr_m_spare++;
1059 }
866#endif 1060#endif
867 /* 1061 mpt += sizeof(struct mpc_config_intsrc);
868 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 1062 count += sizeof(struct mpc_config_intsrc);
869 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 1063 break;
870 */ 1064 }
871 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; 1065 case MP_LINTSRC:
872 mp_ioapic_routing[idx].gsi_base = gsi_base; 1066 {
873 mp_ioapic_routing[idx].gsi_end = gsi_base + 1067 struct mpc_config_lintsrc *m =
874 io_apic_get_redir_entries(idx); 1068 (struct mpc_config_lintsrc *)mpt;
875 1069 mpt += sizeof(*m);
876 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 1070 count += sizeof(*m);
877 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 1071 break;
878 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, 1072 }
879 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 1073 default:
880 1074 /* wrong mptable */
881 nr_ioapics++; 1075 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
882} 1076 printk(KERN_ERR "type %x\n", *mpt);
1077 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1078 1, mpc, mpc->mpc_length, 1);
1079 goto out;
1080 }
1081 }
883 1082
884void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) 1083#ifdef CONFIG_X86_IO_APIC
885{ 1084 for (i = 0; i < mp_irq_entries; i++) {
886 struct mpc_config_intsrc intsrc; 1085 if (irq_used[i])
887 int ioapic = -1; 1086 continue;
888 int pin = -1;
889 1087
890 /* 1088 if (mp_irqs[i].mp_irqtype != mp_INT)
891 * Convert 'gsi' to 'ioapic.pin'. 1089 continue;
892 */
893 ioapic = mp_find_ioapic(gsi);
894 if (ioapic < 0)
895 return;
896 pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
897 1090
898 /* 1091 if (mp_irqs[i].mp_irqflag != 0x0f)
899 * TBD: This check is for faulty timer entries, where the override 1092 continue;
900 * erroneously sets the trigger to level, resulting in a HUGE
901 * increase of timer interrupts!
902 */
903 if ((bus_irq == 0) && (trigger == 3))
904 trigger = 1;
905 1093
906 intsrc.mpc_type = MP_INTSRC; 1094 if (nr_m_spare > 0) {
907 intsrc.mpc_irqtype = mp_INT; 1095 printk(KERN_INFO "*NEW* found ");
908 intsrc.mpc_irqflag = (trigger << 2) | polarity; 1096 nr_m_spare--;
909 intsrc.mpc_srcbus = MP_ISA_BUS; 1097 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
910 intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ 1098 m_spare[nr_m_spare] = NULL;
911 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ 1099 } else {
912 intsrc.mpc_dstirq = pin; /* INTIN# */ 1100 struct mpc_config_intsrc *m =
1101 (struct mpc_config_intsrc *)mpt;
1102 count += sizeof(struct mpc_config_intsrc);
1103 if (!mpc_new_phys) {
1104 printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
1105 } else {
1106 if (count <= mpc_new_length)
1107 printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
1108 else {
1109 printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
1110 goto out;
1111 }
1112 }
1113 assign_to_mpc_intsrc(&mp_irqs[i], m);
1114 mpc->mpc_length = count;
1115 mpt += sizeof(struct mpc_config_intsrc);
1116 }
1117 print_mp_irq_info(&mp_irqs[i]);
1118 }
1119#endif
1120out:
1121 /* update checksum */
1122 mpc->mpc_checksum = 0;
1123 mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
1124 mpc->mpc_length);
913 1125
914 MP_intsrc_info(&intsrc); 1126 return 0;
915} 1127}
916 1128
917void __init mp_config_acpi_legacy_irqs(void) 1129static int __initdata enable_update_mptable;
918{
919 struct mpc_config_intsrc intsrc;
920 int i = 0;
921 int ioapic = -1;
922 1130
923#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1131static int __init update_mptable_setup(char *str)
924 /* 1132{
925 * Fabricate the legacy ISA bus (bus #31). 1133 enable_update_mptable = 1;
926 */ 1134 return 0;
927 mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; 1135}
928#endif 1136early_param("update_mptable", update_mptable_setup);
929 set_bit(MP_ISA_BUS, mp_bus_not_pci);
930 Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
931 1137
932 /* 1138static unsigned long __initdata mpc_new_phys;
933 * Older generations of ES7000 have no legacy identity mappings 1139static unsigned long mpc_new_length __initdata = 4096;
934 */
935 if (es7000_plat == 1)
936 return;
937 1140
938 /* 1141/* alloc_mptable or alloc_mptable=4k */
939 * Locate the IOAPIC that manages the ISA IRQs (0-15). 1142static int __initdata alloc_mptable;
940 */ 1143static int __init parse_alloc_mptable_opt(char *p)
941 ioapic = mp_find_ioapic(0); 1144{
942 if (ioapic < 0) 1145 enable_update_mptable = 1;
943 return; 1146 alloc_mptable = 1;
1147 if (!p)
1148 return 0;
1149 mpc_new_length = memparse(p, &p);
1150 return 0;
1151}
1152early_param("alloc_mptable", parse_alloc_mptable_opt);
944 1153
945 intsrc.mpc_type = MP_INTSRC; 1154void __init early_reserve_e820_mpc_new(void)
946 intsrc.mpc_irqflag = 0; /* Conforming */ 1155{
947 intsrc.mpc_srcbus = MP_ISA_BUS; 1156 if (enable_update_mptable && alloc_mptable) {
948#ifdef CONFIG_X86_IO_APIC 1157 u64 startt = 0;
949 intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; 1158#ifdef CONFIG_X86_TRAMPOLINE
1159 startt = TRAMPOLINE_BASE;
950#endif 1160#endif
951 /* 1161 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
952 * Use the default configuration for the IRQs 0-15. Unless
953 * overridden by (MADT) interrupt source override entries.
954 */
955 for (i = 0; i < 16; i++) {
956 int idx;
957
958 for (idx = 0; idx < mp_irq_entries; idx++) {
959 struct mpc_config_intsrc *irq = mp_irqs + idx;
960
961 /* Do we already have a mapping for this ISA IRQ? */
962 if (irq->mpc_srcbus == MP_ISA_BUS
963 && irq->mpc_srcbusirq == i)
964 break;
965
966 /* Do we already have a mapping for this IOAPIC pin */
967 if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
968 (irq->mpc_dstirq == i))
969 break;
970 }
971
972 if (idx != mp_irq_entries) {
973 printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
974 continue; /* IRQ already used */
975 }
976
977 intsrc.mpc_irqtype = mp_INT;
978 intsrc.mpc_srcbusirq = i; /* Identity mapped */
979 intsrc.mpc_dstirq = i;
980
981 MP_intsrc_info(&intsrc);
982 } 1162 }
983} 1163}
984 1164
985int mp_register_gsi(u32 gsi, int triggering, int polarity) 1165static int __init update_mp_table(void)
986{ 1166{
987 int ioapic; 1167 char str[16];
988 int ioapic_pin; 1168 char oem[10];
989#ifdef CONFIG_X86_32 1169 struct intel_mp_floating *mpf;
990#define MAX_GSI_NUM 4096 1170 struct mp_config_table *mpc;
991#define IRQ_COMPRESSION_START 64 1171 struct mp_config_table *mpc_new;
1172
1173 if (!enable_update_mptable)
1174 return 0;
1175
1176 mpf = mpf_found;
1177 if (!mpf)
1178 return 0;
992 1179
993 static int pci_irq = IRQ_COMPRESSION_START;
994 /* 1180 /*
995 * Mapping between Global System Interrupts, which 1181 * Now see if we need to go further.
996 * represent all possible interrupts, and IRQs
997 * assigned to actual devices.
998 */ 1182 */
999 static int gsi_to_irq[MAX_GSI_NUM]; 1183 if (mpf->mpf_feature1 != 0)
1000#else 1184 return 0;
1001
1002 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
1003 return gsi;
1004#endif
1005 1185
1006 /* Don't set up the ACPI SCI because it's already set up */ 1186 if (!mpf->mpf_physptr)
1007 if (acpi_gbl_FADT.sci_interrupt == gsi) 1187 return 0;
1008 return gsi;
1009 1188
1010 ioapic = mp_find_ioapic(gsi); 1189 mpc = phys_to_virt(mpf->mpf_physptr);
1011 if (ioapic < 0) {
1012 printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
1013 return gsi;
1014 }
1015 1190
1016 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1191 if (!smp_check_mpc(mpc, oem, str))
1192 return 0;
1017 1193
1018#ifdef CONFIG_X86_32 1194 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1019 if (ioapic_renumber_irq) 1195 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
1020 gsi = ioapic_renumber_irq(ioapic, gsi);
1021#endif
1022 1196
1023 /* 1197 if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
1024 * Avoid pin reprogramming. PRTs typically include entries 1198 mpc_new_phys = 0;
1025 * with redundant pin->gsi mappings (but unique PCI devices); 1199 printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
1026 * we only program the IOAPIC on the first. 1200 mpc_new_length);
1027 */
1028 if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
1029 printk(KERN_ERR "Invalid reference to IOAPIC pin "
1030 "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
1031 ioapic_pin);
1032 return gsi;
1033 } 1201 }
1034 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { 1202
1035 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 1203 if (!mpc_new_phys) {
1036 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 1204 unsigned char old, new;
1037#ifdef CONFIG_X86_32 1205 /* check if we can change the postion */
1038 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); 1206 mpc->mpc_checksum = 0;
1039#else 1207 old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
1040 return gsi; 1208 mpc->mpc_checksum = 0xff;
1041#endif 1209 new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
1210 if (old == new) {
1211 printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
1212 return 0;
1213 }
1214 printk(KERN_INFO "use in-positon replacing\n");
1215 } else {
1216 mpf->mpf_physptr = mpc_new_phys;
1217 mpc_new = phys_to_virt(mpc_new_phys);
1218 memcpy(mpc_new, mpc, mpc->mpc_length);
1219 mpc = mpc_new;
1220 /* check if we can modify that */
1221 if (mpc_new_phys - mpf->mpf_physptr) {
1222 struct intel_mp_floating *mpf_new;
1223 /* steal 16 bytes from [0, 1k) */
1224 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1225 mpf_new = phys_to_virt(0x400 - 16);
1226 memcpy(mpf_new, mpf, 16);
1227 mpf = mpf_new;
1228 mpf->mpf_physptr = mpc_new_phys;
1229 }
1230 mpf->mpf_checksum = 0;
1231 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
1232 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
1042 } 1233 }
1043 1234
1044 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
1045#ifdef CONFIG_X86_32
1046 /* 1235 /*
1047 * For GSI >= 64, use IRQ compression 1236 * only replace the one with mp_INT and
1237 * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW,
1238 * already in mp_irqs , stored by ... and mp_config_acpi_gsi,
1239 * may need pci=routeirq for all coverage
1048 */ 1240 */
1049 if ((gsi >= IRQ_COMPRESSION_START) 1241 replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
1050 && (triggering == ACPI_LEVEL_SENSITIVE)) { 1242
1051 /* 1243 return 0;
1052 * For PCI devices assign IRQs in order, avoiding gaps
1053 * due to unused I/O APIC pins.
1054 */
1055 int irq = gsi;
1056 if (gsi < MAX_GSI_NUM) {
1057 /*
1058 * Retain the VIA chipset work-around (gsi > 15), but
1059 * avoid a problem where the 8254 timer (IRQ0) is setup
1060 * via an override (so it's not on pin 0 of the ioapic),
1061 * and at the same time, the pin 0 interrupt is a PCI
1062 * type. The gsi > 15 test could cause these two pins
1063 * to be shared as IRQ0, and they are not shareable.
1064 * So test for this condition, and if necessary, avoid
1065 * the pin collision.
1066 */
1067 gsi = pci_irq++;
1068 /*
1069 * Don't assign IRQ used by ACPI SCI
1070 */
1071 if (gsi == acpi_gbl_FADT.sci_interrupt)
1072 gsi = pci_irq++;
1073 gsi_to_irq[irq] = gsi;
1074 } else {
1075 printk(KERN_ERR "GSI %u is too high\n", gsi);
1076 return gsi;
1077 }
1078 }
1079#endif
1080 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
1081 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
1082 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
1083 return gsi;
1084} 1244}
1085 1245
1086#endif /* CONFIG_X86_IO_APIC */ 1246late_initcall(update_mp_table);
1087#endif /* CONFIG_ACPI */
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi.c
index 84160f74eeb0..716b89284be0 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi.c
@@ -11,10 +11,13 @@
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */ 12 */
13 13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
14#include <linux/delay.h> 18#include <linux/delay.h>
15#include <linux/interrupt.h> 19#include <linux/interrupt.h>
16#include <linux/module.h> 20#include <linux/module.h>
17#include <linux/nmi.h>
18#include <linux/sysdev.h> 21#include <linux/sysdev.h>
19#include <linux/sysctl.h> 22#include <linux/sysctl.h>
20#include <linux/percpu.h> 23#include <linux/percpu.h>
@@ -22,12 +25,18 @@
22#include <linux/cpumask.h> 25#include <linux/cpumask.h>
23#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
24#include <linux/kdebug.h> 27#include <linux/kdebug.h>
25#include <linux/slab.h> 28#include <linux/smp.h>
26 29
30#include <asm/i8259.h>
31#include <asm/io_apic.h>
27#include <asm/smp.h> 32#include <asm/smp.h>
28#include <asm/nmi.h> 33#include <asm/nmi.h>
34#include <asm/proto.h>
35#include <asm/timer.h>
29 36
30#include "mach_traps.h" 37#include <asm/mce.h>
38
39#include <mach_traps.h>
31 40
32int unknown_nmi_panic; 41int unknown_nmi_panic;
33int nmi_watchdog_enabled; 42int nmi_watchdog_enabled;
@@ -41,28 +50,65 @@ static cpumask_t backtrace_mask = CPU_MASK_NONE;
41 * 0: the lapic NMI watchdog is disabled, but can be enabled 50 * 0: the lapic NMI watchdog is disabled, but can be enabled
42 */ 51 */
43atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ 52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
44 54
45unsigned int nmi_watchdog = NMI_DEFAULT; 55unsigned int nmi_watchdog = NMI_NONE;
46static unsigned int nmi_hz = HZ; 56EXPORT_SYMBOL(nmi_watchdog);
47 57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
48static DEFINE_PER_CPU(short, wd_enabled); 61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
49 63
50static int endflag __initdata = 0; 64static inline unsigned int get_nmi_count(int cpu)
65{
66#ifdef CONFIG_X86_64
67 return cpu_pda(cpu)->__nmi_count;
68#else
69 return nmi_count(cpu);
70#endif
71}
72
73static inline int mce_in_progress(void)
74{
75#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
76 return atomic_read(&mce_entry) > 0;
77#endif
78 return 0;
79}
80
81/*
82 * Take the local apic timer and PIT/HPET into account. We don't
83 * know which one is active, when we have highres/dyntick on
84 */
85static inline unsigned int get_timer_irqs(int cpu)
86{
87#ifdef CONFIG_X86_64
88 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
89#else
90 return per_cpu(irq_stat, cpu).apic_timer_irqs +
91 per_cpu(irq_stat, cpu).irq0_irqs;
92#endif
93}
51 94
52#ifdef CONFIG_SMP 95#ifdef CONFIG_SMP
53/* The performance counters used by NMI_LOCAL_APIC don't trigger when 96/*
97 * The performance counters used by NMI_LOCAL_APIC don't trigger when
54 * the CPU is idle. To make sure the NMI watchdog really ticks on all 98 * the CPU is idle. To make sure the NMI watchdog really ticks on all
55 * CPUs during the test make them busy. 99 * CPUs during the test make them busy.
56 */ 100 */
57static __init void nmi_cpu_busy(void *data) 101static __init void nmi_cpu_busy(void *data)
58{ 102{
59 local_irq_enable_in_hardirq(); 103 local_irq_enable_in_hardirq();
60 /* Intentionally don't use cpu_relax here. This is 104 /*
61 to make sure that the performance counter really ticks, 105 * Intentionally don't use cpu_relax here. This is
62 even if there is a simulator or similar that catches the 106 * to make sure that the performance counter really ticks,
63 pause instruction. On a real HT machine this is fine because 107 * even if there is a simulator or similar that catches the
64 all other CPUs are busy with "useless" delay loops and don't 108 * pause instruction. On a real HT machine this is fine because
65 care if they get somewhat less cycles. */ 109 * all other CPUs are busy with "useless" delay loops and don't
110 * care if they get somewhat less cycles.
111 */
66 while (endflag == 0) 112 while (endflag == 0)
67 mb(); 113 mb();
68} 114}
@@ -73,15 +119,12 @@ int __init check_nmi_watchdog(void)
73 unsigned int *prev_nmi_count; 119 unsigned int *prev_nmi_count;
74 int cpu; 120 int cpu;
75 121
76 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) 122 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
77 return 0;
78
79 if (!atomic_read(&nmi_active))
80 return 0; 123 return 0;
81 124
82 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 125 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
83 if (!prev_nmi_count) 126 if (!prev_nmi_count)
84 return -1; 127 goto error;
85 128
86 printk(KERN_INFO "Testing NMI watchdog ... "); 129 printk(KERN_INFO "Testing NMI watchdog ... ");
87 130
@@ -91,25 +134,19 @@ int __init check_nmi_watchdog(void)
91#endif 134#endif
92 135
93 for_each_possible_cpu(cpu) 136 for_each_possible_cpu(cpu)
94 prev_nmi_count[cpu] = nmi_count(cpu); 137 prev_nmi_count[cpu] = get_nmi_count(cpu);
95 local_irq_enable(); 138 local_irq_enable();
96 mdelay((20*1000)/nmi_hz); // wait 20 ticks 139 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
97 140
98 for_each_possible_cpu(cpu) { 141 for_each_online_cpu(cpu) {
99#ifdef CONFIG_SMP
100 /* Check cpu_callin_map here because that is set
101 after the timer is started. */
102 if (!cpu_isset(cpu, cpu_callin_map))
103 continue;
104#endif
105 if (!per_cpu(wd_enabled, cpu)) 142 if (!per_cpu(wd_enabled, cpu))
106 continue; 143 continue;
107 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 144 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
108 printk(KERN_WARNING "WARNING: CPU#%d: NMI " 145 printk(KERN_WARNING "WARNING: CPU#%d: NMI "
109 "appears to be stuck (%d->%d)!\n", 146 "appears to be stuck (%d->%d)!\n",
110 cpu, 147 cpu,
111 prev_nmi_count[cpu], 148 prev_nmi_count[cpu],
112 nmi_count(cpu)); 149 get_nmi_count(cpu));
113 per_cpu(wd_enabled, cpu) = 0; 150 per_cpu(wd_enabled, cpu) = 0;
114 atomic_dec(&nmi_active); 151 atomic_dec(&nmi_active);
115 } 152 }
@@ -118,37 +155,53 @@ int __init check_nmi_watchdog(void)
118 if (!atomic_read(&nmi_active)) { 155 if (!atomic_read(&nmi_active)) {
119 kfree(prev_nmi_count); 156 kfree(prev_nmi_count);
120 atomic_set(&nmi_active, -1); 157 atomic_set(&nmi_active, -1);
121 return -1; 158 goto error;
122 } 159 }
123 printk("OK.\n"); 160 printk("OK.\n");
124 161
125 /* now that we know it works we can reduce NMI frequency to 162 /*
126 something more reasonable; makes a difference in some configs */ 163 * now that we know it works we can reduce NMI frequency to
164 * something more reasonable; makes a difference in some configs
165 */
127 if (nmi_watchdog == NMI_LOCAL_APIC) 166 if (nmi_watchdog == NMI_LOCAL_APIC)
128 nmi_hz = lapic_adjust_nmi_hz(1); 167 nmi_hz = lapic_adjust_nmi_hz(1);
129 168
130 kfree(prev_nmi_count); 169 kfree(prev_nmi_count);
131 return 0; 170 return 0;
171error:
172 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
173 disable_8259A_irq(0);
174#ifdef CONFIG_X86_32
175 timer_ack = 0;
176#endif
177 return -1;
132} 178}
133 179
134static int __init setup_nmi_watchdog(char *str) 180static int __init setup_nmi_watchdog(char *str)
135{ 181{
136 int nmi; 182 unsigned int nmi;
183
184 if (!strncmp(str, "panic", 5)) {
185 panic_on_timeout = 1;
186 str = strchr(str, ',');
187 if (!str)
188 return 1;
189 ++str;
190 }
137 191
138 get_option(&str, &nmi); 192 get_option(&str, &nmi);
139 193
140 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) 194 if (nmi >= NMI_INVALID)
141 return 0; 195 return 0;
142 196
143 nmi_watchdog = nmi; 197 nmi_watchdog = nmi;
144 return 1; 198 return 1;
145} 199}
146
147__setup("nmi_watchdog=", setup_nmi_watchdog); 200__setup("nmi_watchdog=", setup_nmi_watchdog);
148 201
149 202/*
150/* Suspend/resume support */ 203 * Suspend/resume support
151 204 */
152#ifdef CONFIG_PM 205#ifdef CONFIG_PM
153 206
154static int nmi_pm_active; /* nmi_active before suspend */ 207static int nmi_pm_active; /* nmi_active before suspend */
@@ -172,7 +225,6 @@ static int lapic_nmi_resume(struct sys_device *dev)
172 return 0; 225 return 0;
173} 226}
174 227
175
176static struct sysdev_class nmi_sysclass = { 228static struct sysdev_class nmi_sysclass = {
177 .name = "lapic_nmi", 229 .name = "lapic_nmi",
178 .resume = lapic_nmi_resume, 230 .resume = lapic_nmi_resume,
@@ -188,7 +240,8 @@ static int __init init_lapic_nmi_sysfs(void)
188{ 240{
189 int error; 241 int error;
190 242
191 /* should really be a BUG_ON but b/c this is an 243 /*
244 * should really be a BUG_ON but b/c this is an
192 * init call, it just doesn't work. -dcz 245 * init call, it just doesn't work. -dcz
193 */ 246 */
194 if (nmi_watchdog != NMI_LOCAL_APIC) 247 if (nmi_watchdog != NMI_LOCAL_APIC)
@@ -202,6 +255,7 @@ static int __init init_lapic_nmi_sysfs(void)
202 error = sysdev_register(&device_lapic_nmi); 255 error = sysdev_register(&device_lapic_nmi);
203 return error; 256 return error;
204} 257}
258
205/* must come after the local APIC's device_initcall() */ 259/* must come after the local APIC's device_initcall() */
206late_initcall(init_lapic_nmi_sysfs); 260late_initcall(init_lapic_nmi_sysfs);
207 261
@@ -223,7 +277,7 @@ void acpi_nmi_enable(void)
223 277
224static void __acpi_nmi_disable(void *__unused) 278static void __acpi_nmi_disable(void *__unused)
225{ 279{
226 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); 280 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
227} 281}
228 282
229/* 283/*
@@ -242,12 +296,13 @@ void setup_apic_nmi_watchdog(void *unused)
242 296
243 /* cheap hack to support suspend/resume */ 297 /* cheap hack to support suspend/resume */
244 /* if cpu0 is not active neither should the other cpus */ 298 /* if cpu0 is not active neither should the other cpus */
245 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) 299 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
246 return; 300 return;
247 301
248 switch (nmi_watchdog) { 302 switch (nmi_watchdog) {
249 case NMI_LOCAL_APIC: 303 case NMI_LOCAL_APIC:
250 __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ 304 /* enable it before to avoid race with handler */
305 __get_cpu_var(wd_enabled) = 1;
251 if (lapic_watchdog_init(nmi_hz) < 0) { 306 if (lapic_watchdog_init(nmi_hz) < 0) {
252 __get_cpu_var(wd_enabled) = 0; 307 __get_cpu_var(wd_enabled) = 0;
253 return; 308 return;
@@ -262,9 +317,8 @@ void setup_apic_nmi_watchdog(void *unused)
262void stop_apic_nmi_watchdog(void *unused) 317void stop_apic_nmi_watchdog(void *unused)
263{ 318{
264 /* only support LOCAL and IO APICs for now */ 319 /* only support LOCAL and IO APICs for now */
265 if ((nmi_watchdog != NMI_LOCAL_APIC) && 320 if (!nmi_watchdog_active())
266 (nmi_watchdog != NMI_IO_APIC)) 321 return;
267 return;
268 if (__get_cpu_var(wd_enabled) == 0) 322 if (__get_cpu_var(wd_enabled) == 0)
269 return; 323 return;
270 if (nmi_watchdog == NMI_LOCAL_APIC) 324 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -284,26 +338,26 @@ void stop_apic_nmi_watchdog(void *unused)
284 * since NMIs don't listen to _any_ locks, we have to be extremely 338 * since NMIs don't listen to _any_ locks, we have to be extremely
285 * careful not to rely on unsafe variables. The printk might lock 339 * careful not to rely on unsafe variables. The printk might lock
286 * up though, so we have to break up any console locks first ... 340 * up though, so we have to break up any console locks first ...
287 * [when there will be more tty-related locks, break them up 341 * [when there will be more tty-related locks, break them up here too!]
288 * here too!]
289 */ 342 */
290 343
291static unsigned int 344static DEFINE_PER_CPU(unsigned, last_irq_sum);
292 last_irq_sums [NR_CPUS], 345static DEFINE_PER_CPU(local_t, alert_counter);
293 alert_counter [NR_CPUS]; 346static DEFINE_PER_CPU(int, nmi_touch);
294 347
295void touch_nmi_watchdog(void) 348void touch_nmi_watchdog(void)
296{ 349{
297 if (nmi_watchdog > 0) { 350 if (nmi_watchdog_active()) {
298 unsigned cpu; 351 unsigned cpu;
299 352
300 /* 353 /*
301 * Just reset the alert counters, (other CPUs might be 354 * Tell other CPUs to reset their alert counters. We cannot
302 * spinning on locks we hold): 355 * do it ourselves because the alert count increase is not
356 * atomic.
303 */ 357 */
304 for_each_present_cpu(cpu) { 358 for_each_present_cpu(cpu) {
305 if (alert_counter[cpu]) 359 if (per_cpu(nmi_touch, cpu) != 1)
306 alert_counter[cpu] = 0; 360 per_cpu(nmi_touch, cpu) = 1;
307 } 361 }
308 } 362 }
309 363
@@ -314,12 +368,9 @@ void touch_nmi_watchdog(void)
314} 368}
315EXPORT_SYMBOL(touch_nmi_watchdog); 369EXPORT_SYMBOL(touch_nmi_watchdog);
316 370
317extern void die_nmi(struct pt_regs *, const char *msg);
318
319notrace __kprobes int 371notrace __kprobes int
320nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) 372nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
321{ 373{
322
323 /* 374 /*
324 * Since current_thread_info()-> is always on the stack, and we 375 * Since current_thread_info()-> is always on the stack, and we
325 * always switch the stack NMI-atomically, it's safe to use 376 * always switch the stack NMI-atomically, it's safe to use
@@ -337,39 +388,45 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
337 touched = 1; 388 touched = 1;
338 } 389 }
339 390
391 sum = get_timer_irqs(cpu);
392
393 if (__get_cpu_var(nmi_touch)) {
394 __get_cpu_var(nmi_touch) = 0;
395 touched = 1;
396 }
397
340 if (cpu_isset(cpu, backtrace_mask)) { 398 if (cpu_isset(cpu, backtrace_mask)) {
341 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 399 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
342 400
343 spin_lock(&lock); 401 spin_lock(&lock);
344 printk("NMI backtrace for cpu %d\n", cpu); 402 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
345 dump_stack(); 403 dump_stack();
346 spin_unlock(&lock); 404 spin_unlock(&lock);
347 cpu_clear(cpu, backtrace_mask); 405 cpu_clear(cpu, backtrace_mask);
348 } 406 }
349 407
350 /* 408 /* Could check oops_in_progress here too, but it's safer not to */
351 * Take the local apic timer and PIT/HPET into account. We don't 409 if (mce_in_progress())
352 * know which one is active, when we have highres/dyntick on 410 touched = 1;
353 */
354 sum = per_cpu(irq_stat, cpu).apic_timer_irqs +
355 per_cpu(irq_stat, cpu).irq0_irqs;
356 411
357 /* if the none of the timers isn't firing, this cpu isn't doing much */ 412 /* if the none of the timers isn't firing, this cpu isn't doing much */
358 if (!touched && last_irq_sums[cpu] == sum) { 413 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
359 /* 414 /*
360 * Ayiee, looks like this CPU is stuck ... 415 * Ayiee, looks like this CPU is stuck ...
361 * wait a few IRQs (5 seconds) before doing the oops ... 416 * wait a few IRQs (5 seconds) before doing the oops ...
362 */ 417 */
363 alert_counter[cpu]++; 418 local_inc(&__get_cpu_var(alert_counter));
364 if (alert_counter[cpu] == 5*nmi_hz) 419 if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
365 /* 420 /*
366 * die_nmi will return ONLY if NOTIFY_STOP happens.. 421 * die_nmi will return ONLY if NOTIFY_STOP happens..
367 */ 422 */
368 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); 423 die_nmi("BUG: NMI Watchdog detected LOCKUP",
424 regs, panic_on_timeout);
369 } else { 425 } else {
370 last_irq_sums[cpu] = sum; 426 __get_cpu_var(last_irq_sum) = sum;
371 alert_counter[cpu] = 0; 427 local_set(&__get_cpu_var(alert_counter), 0);
372 } 428 }
429
373 /* see if the nmi watchdog went off */ 430 /* see if the nmi watchdog went off */
374 if (!__get_cpu_var(wd_enabled)) 431 if (!__get_cpu_var(wd_enabled))
375 return rc; 432 return rc;
@@ -378,7 +435,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
378 rc |= lapic_wd_event(nmi_hz); 435 rc |= lapic_wd_event(nmi_hz);
379 break; 436 break;
380 case NMI_IO_APIC: 437 case NMI_IO_APIC:
381 /* don't know how to accurately check for this. 438 /*
439 * don't know how to accurately check for this.
382 * just assume it was a watchdog timer interrupt 440 * just assume it was a watchdog timer interrupt
383 * This matches the old behaviour. 441 * This matches the old behaviour.
384 */ 442 */
@@ -396,7 +454,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
396 char buf[64]; 454 char buf[64];
397 455
398 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 456 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
399 die_nmi(regs, buf); 457 die_nmi(buf, regs, 1); /* Always panic here */
400 return 0; 458 return 0;
401} 459}
402 460
@@ -414,32 +472,26 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
414 if (!!old_state == !!nmi_watchdog_enabled) 472 if (!!old_state == !!nmi_watchdog_enabled)
415 return 0; 473 return 0;
416 474
417 if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { 475 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
418 printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); 476 printk(KERN_WARNING
477 "NMI watchdog is permanently disabled\n");
419 return -EIO; 478 return -EIO;
420 } 479 }
421 480
422 if (nmi_watchdog == NMI_DEFAULT) {
423 if (lapic_watchdog_ok())
424 nmi_watchdog = NMI_LOCAL_APIC;
425 else
426 nmi_watchdog = NMI_IO_APIC;
427 }
428
429 if (nmi_watchdog == NMI_LOCAL_APIC) { 481 if (nmi_watchdog == NMI_LOCAL_APIC) {
430 if (nmi_watchdog_enabled) 482 if (nmi_watchdog_enabled)
431 enable_lapic_nmi_watchdog(); 483 enable_lapic_nmi_watchdog();
432 else 484 else
433 disable_lapic_nmi_watchdog(); 485 disable_lapic_nmi_watchdog();
434 } else { 486 } else {
435 printk( KERN_WARNING 487 printk(KERN_WARNING
436 "NMI watchdog doesn't know what hardware to touch\n"); 488 "NMI watchdog doesn't know what hardware to touch\n");
437 return -EIO; 489 return -EIO;
438 } 490 }
439 return 0; 491 return 0;
440} 492}
441 493
442#endif 494#endif /* CONFIG_SYSCTL */
443 495
444int do_nmi_callback(struct pt_regs *regs, int cpu) 496int do_nmi_callback(struct pt_regs *regs, int cpu)
445{ 497{
@@ -462,6 +514,3 @@ void __trigger_all_cpu_backtrace(void)
462 mdelay(1); 514 mdelay(1);
463 } 515 }
464} 516}
465
466EXPORT_SYMBOL(nmi_active);
467EXPORT_SYMBOL(nmi_watchdog);
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
deleted file mode 100644
index 5a29ded994fa..000000000000
--- a/arch/x86/kernel/nmi_64.c
+++ /dev/null
@@ -1,482 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Pavel Machek and
10 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
11 */
12
13#include <linux/nmi.h>
14#include <linux/mm.h>
15#include <linux/delay.h>
16#include <linux/interrupt.h>
17#include <linux/module.h>
18#include <linux/sysdev.h>
19#include <linux/sysctl.h>
20#include <linux/kprobes.h>
21#include <linux/cpumask.h>
22#include <linux/kdebug.h>
23
24#include <asm/smp.h>
25#include <asm/nmi.h>
26#include <asm/proto.h>
27#include <asm/mce.h>
28
29#include <mach_traps.h>
30
31int unknown_nmi_panic;
32int nmi_watchdog_enabled;
33int panic_on_unrecovered_nmi;
34
35static cpumask_t backtrace_mask = CPU_MASK_NONE;
36
37/* nmi_active:
38 * >0: the lapic NMI watchdog is active, but can be disabled
39 * <0: the lapic NMI watchdog has not been set up, and cannot
40 * be enabled
41 * 0: the lapic NMI watchdog is disabled, but can be enabled
42 */
43atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
44static int panic_on_timeout;
45
46unsigned int nmi_watchdog = NMI_DEFAULT;
47static unsigned int nmi_hz = HZ;
48
49static DEFINE_PER_CPU(short, wd_enabled);
50
51/* Run after command line and cpu_init init, but before all other checks */
52void nmi_watchdog_default(void)
53{
54 if (nmi_watchdog != NMI_DEFAULT)
55 return;
56 nmi_watchdog = NMI_NONE;
57}
58
59static int endflag __initdata = 0;
60
61#ifdef CONFIG_SMP
62/* The performance counters used by NMI_LOCAL_APIC don't trigger when
63 * the CPU is idle. To make sure the NMI watchdog really ticks on all
64 * CPUs during the test make them busy.
65 */
66static __init void nmi_cpu_busy(void *data)
67{
68 local_irq_enable_in_hardirq();
69 /* Intentionally don't use cpu_relax here. This is
70 to make sure that the performance counter really ticks,
71 even if there is a simulator or similar that catches the
72 pause instruction. On a real HT machine this is fine because
73 all other CPUs are busy with "useless" delay loops and don't
74 care if they get somewhat less cycles. */
75 while (endflag == 0)
76 mb();
77}
78#endif
79
80int __init check_nmi_watchdog(void)
81{
82 int *prev_nmi_count;
83 int cpu;
84
85 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED))
86 return 0;
87
88 if (!atomic_read(&nmi_active))
89 return 0;
90
91 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
92 if (!prev_nmi_count)
93 return -1;
94
95 printk(KERN_INFO "Testing NMI watchdog ... ");
96
97#ifdef CONFIG_SMP
98 if (nmi_watchdog == NMI_LOCAL_APIC)
99 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
100#endif
101
102 for (cpu = 0; cpu < NR_CPUS; cpu++)
103 prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count;
104 local_irq_enable();
105 mdelay((20*1000)/nmi_hz); // wait 20 ticks
106
107 for_each_online_cpu(cpu) {
108 if (!per_cpu(wd_enabled, cpu))
109 continue;
110 if (cpu_pda(cpu)->__nmi_count - prev_nmi_count[cpu] <= 5) {
111 printk(KERN_WARNING "WARNING: CPU#%d: NMI "
112 "appears to be stuck (%d->%d)!\n",
113 cpu,
114 prev_nmi_count[cpu],
115 cpu_pda(cpu)->__nmi_count);
116 per_cpu(wd_enabled, cpu) = 0;
117 atomic_dec(&nmi_active);
118 }
119 }
120 endflag = 1;
121 if (!atomic_read(&nmi_active)) {
122 kfree(prev_nmi_count);
123 atomic_set(&nmi_active, -1);
124 return -1;
125 }
126 printk("OK.\n");
127
128 /* now that we know it works we can reduce NMI frequency to
129 something more reasonable; makes a difference in some configs */
130 if (nmi_watchdog == NMI_LOCAL_APIC)
131 nmi_hz = lapic_adjust_nmi_hz(1);
132
133 kfree(prev_nmi_count);
134 return 0;
135}
136
137static int __init setup_nmi_watchdog(char *str)
138{
139 int nmi;
140
141 if (!strncmp(str,"panic",5)) {
142 panic_on_timeout = 1;
143 str = strchr(str, ',');
144 if (!str)
145 return 1;
146 ++str;
147 }
148
149 get_option(&str, &nmi);
150
151 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
152 return 0;
153
154 nmi_watchdog = nmi;
155 return 1;
156}
157
158__setup("nmi_watchdog=", setup_nmi_watchdog);
159
160#ifdef CONFIG_PM
161
162static int nmi_pm_active; /* nmi_active before suspend */
163
164static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
165{
166 /* only CPU0 goes here, other CPUs should be offline */
167 nmi_pm_active = atomic_read(&nmi_active);
168 stop_apic_nmi_watchdog(NULL);
169 BUG_ON(atomic_read(&nmi_active) != 0);
170 return 0;
171}
172
173static int lapic_nmi_resume(struct sys_device *dev)
174{
175 /* only CPU0 goes here, other CPUs should be offline */
176 if (nmi_pm_active > 0) {
177 setup_apic_nmi_watchdog(NULL);
178 touch_nmi_watchdog();
179 }
180 return 0;
181}
182
183static struct sysdev_class nmi_sysclass = {
184 .name = "lapic_nmi",
185 .resume = lapic_nmi_resume,
186 .suspend = lapic_nmi_suspend,
187};
188
189static struct sys_device device_lapic_nmi = {
190 .id = 0,
191 .cls = &nmi_sysclass,
192};
193
194static int __init init_lapic_nmi_sysfs(void)
195{
196 int error;
197
198 /* should really be a BUG_ON but b/c this is an
199 * init call, it just doesn't work. -dcz
200 */
201 if (nmi_watchdog != NMI_LOCAL_APIC)
202 return 0;
203
204 if (atomic_read(&nmi_active) < 0)
205 return 0;
206
207 error = sysdev_class_register(&nmi_sysclass);
208 if (!error)
209 error = sysdev_register(&device_lapic_nmi);
210 return error;
211}
212/* must come after the local APIC's device_initcall() */
213late_initcall(init_lapic_nmi_sysfs);
214
215#endif /* CONFIG_PM */
216
217static void __acpi_nmi_enable(void *__unused)
218{
219 apic_write(APIC_LVT0, APIC_DM_NMI);
220}
221
222/*
223 * Enable timer based NMIs on all CPUs:
224 */
225void acpi_nmi_enable(void)
226{
227 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
228 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
229}
230
231static void __acpi_nmi_disable(void *__unused)
232{
233 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
234}
235
236/*
237 * Disable timer based NMIs on all CPUs:
238 */
239void acpi_nmi_disable(void)
240{
241 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
242 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
243}
244
245void setup_apic_nmi_watchdog(void *unused)
246{
247 if (__get_cpu_var(wd_enabled))
248 return;
249
250 /* cheap hack to support suspend/resume */
251 /* if cpu0 is not active neither should the other cpus */
252 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
253 return;
254
255 switch (nmi_watchdog) {
256 case NMI_LOCAL_APIC:
257 __get_cpu_var(wd_enabled) = 1;
258 if (lapic_watchdog_init(nmi_hz) < 0) {
259 __get_cpu_var(wd_enabled) = 0;
260 return;
261 }
262 /* FALL THROUGH */
263 case NMI_IO_APIC:
264 __get_cpu_var(wd_enabled) = 1;
265 atomic_inc(&nmi_active);
266 }
267}
268
269void stop_apic_nmi_watchdog(void *unused)
270{
271 /* only support LOCAL and IO APICs for now */
272 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
273 (nmi_watchdog != NMI_IO_APIC))
274 return;
275 if (__get_cpu_var(wd_enabled) == 0)
276 return;
277 if (nmi_watchdog == NMI_LOCAL_APIC)
278 lapic_watchdog_stop();
279 __get_cpu_var(wd_enabled) = 0;
280 atomic_dec(&nmi_active);
281}
282
283/*
284 * the best way to detect whether a CPU has a 'hard lockup' problem
285 * is to check it's local APIC timer IRQ counts. If they are not
286 * changing then that CPU has some problem.
287 *
288 * as these watchdog NMI IRQs are generated on every CPU, we only
289 * have to check the current processor.
290 */
291
292static DEFINE_PER_CPU(unsigned, last_irq_sum);
293static DEFINE_PER_CPU(local_t, alert_counter);
294static DEFINE_PER_CPU(int, nmi_touch);
295
296void touch_nmi_watchdog(void)
297{
298 if (nmi_watchdog > 0) {
299 unsigned cpu;
300
301 /*
302 * Tell other CPUs to reset their alert counters. We cannot
303 * do it ourselves because the alert count increase is not
304 * atomic.
305 */
306 for_each_present_cpu(cpu) {
307 if (per_cpu(nmi_touch, cpu) != 1)
308 per_cpu(nmi_touch, cpu) = 1;
309 }
310 }
311
312 touch_softlockup_watchdog();
313}
314EXPORT_SYMBOL(touch_nmi_watchdog);
315
316notrace __kprobes int
317nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
318{
319 int sum;
320 int touched = 0;
321 int cpu = smp_processor_id();
322 int rc = 0;
323
324 /* check for other users first */
325 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
326 == NOTIFY_STOP) {
327 rc = 1;
328 touched = 1;
329 }
330
331 sum = read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
332 if (__get_cpu_var(nmi_touch)) {
333 __get_cpu_var(nmi_touch) = 0;
334 touched = 1;
335 }
336
337 if (cpu_isset(cpu, backtrace_mask)) {
338 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
339
340 spin_lock(&lock);
341 printk("NMI backtrace for cpu %d\n", cpu);
342 dump_stack();
343 spin_unlock(&lock);
344 cpu_clear(cpu, backtrace_mask);
345 }
346
347#ifdef CONFIG_X86_MCE
348 /* Could check oops_in_progress here too, but it's safer
349 not too */
350 if (atomic_read(&mce_entry) > 0)
351 touched = 1;
352#endif
353 /* if the apic timer isn't firing, this cpu isn't doing much */
354 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
355 /*
356 * Ayiee, looks like this CPU is stuck ...
357 * wait a few IRQs (5 seconds) before doing the oops ...
358 */
359 local_inc(&__get_cpu_var(alert_counter));
360 if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz)
361 die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs,
362 panic_on_timeout);
363 } else {
364 __get_cpu_var(last_irq_sum) = sum;
365 local_set(&__get_cpu_var(alert_counter), 0);
366 }
367
368 /* see if the nmi watchdog went off */
369 if (!__get_cpu_var(wd_enabled))
370 return rc;
371 switch (nmi_watchdog) {
372 case NMI_LOCAL_APIC:
373 rc |= lapic_wd_event(nmi_hz);
374 break;
375 case NMI_IO_APIC:
376 /* don't know how to accurately check for this.
377 * just assume it was a watchdog timer interrupt
378 * This matches the old behaviour.
379 */
380 rc = 1;
381 break;
382 }
383 return rc;
384}
385
386static unsigned ignore_nmis;
387
388asmlinkage notrace __kprobes void
389do_nmi(struct pt_regs *regs, long error_code)
390{
391 nmi_enter();
392 add_pda(__nmi_count,1);
393 if (!ignore_nmis)
394 default_do_nmi(regs);
395 nmi_exit();
396}
397
398void stop_nmi(void)
399{
400 acpi_nmi_disable();
401 ignore_nmis++;
402}
403
404void restart_nmi(void)
405{
406 ignore_nmis--;
407 acpi_nmi_enable();
408}
409
410#ifdef CONFIG_SYSCTL
411
412static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
413{
414 unsigned char reason = get_nmi_reason();
415 char buf[64];
416
417 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
418 die_nmi(buf, regs, 1); /* Always panic here */
419 return 0;
420}
421
422/*
423 * proc handler for /proc/sys/kernel/nmi
424 */
425int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
426 void __user *buffer, size_t *length, loff_t *ppos)
427{
428 int old_state;
429
430 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
431 old_state = nmi_watchdog_enabled;
432 proc_dointvec(table, write, file, buffer, length, ppos);
433 if (!!old_state == !!nmi_watchdog_enabled)
434 return 0;
435
436 if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) {
437 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
438 return -EIO;
439 }
440
441 /* if nmi_watchdog is not set yet, then set it */
442 nmi_watchdog_default();
443
444 if (nmi_watchdog == NMI_LOCAL_APIC) {
445 if (nmi_watchdog_enabled)
446 enable_lapic_nmi_watchdog();
447 else
448 disable_lapic_nmi_watchdog();
449 } else {
450 printk( KERN_WARNING
451 "NMI watchdog doesn't know what hardware to touch\n");
452 return -EIO;
453 }
454 return 0;
455}
456
457#endif
458
459int do_nmi_callback(struct pt_regs *regs, int cpu)
460{
461#ifdef CONFIG_SYSCTL
462 if (unknown_nmi_panic)
463 return unknown_nmi_panic_callback(regs, cpu);
464#endif
465 return 0;
466}
467
468void __trigger_all_cpu_backtrace(void)
469{
470 int i;
471
472 backtrace_mask = cpu_online_map;
473 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
474 for (i = 0; i < 10 * 1000; i++) {
475 if (cpus_empty(backtrace_mask))
476 break;
477 mdelay(1);
478 }
479}
480
481EXPORT_SYMBOL(nmi_active);
482EXPORT_SYMBOL(nmi_watchdog);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index e65281b1634b..f0f1de1c4a1d 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,6 +31,8 @@
31#include <asm/numaq.h> 31#include <asm/numaq.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/mpspec.h>
35#include <asm/e820.h>
34 36
35#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 37#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
36 38
@@ -58,6 +60,8 @@ static void __init smp_dump_qct(void)
58 node_end_pfn[node] = MB_TO_PAGES( 60 node_end_pfn[node] = MB_TO_PAGES(
59 eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); 61 eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
60 62
63 e820_register_active_regions(node, node_start_pfn[node],
64 node_end_pfn[node]);
61 memory_present(node, 65 memory_present(node,
62 node_start_pfn[node], node_end_pfn[node]); 66 node_start_pfn[node], node_end_pfn[node]);
63 node_remap_size[node] = node_memmap_size_bytes(node, 67 node_remap_size[node] = node_memmap_size_bytes(node,
@@ -67,13 +71,24 @@ static void __init smp_dump_qct(void)
67 } 71 }
68} 72}
69 73
70/* 74static __init void early_check_numaq(void)
71 * Unlike Summit, we don't really care to let the NUMA-Q 75{
72 * fall back to flat mode. Don't compile for NUMA-Q 76 /*
73 * unless you really need it! 77 * Find possible boot-time SMP configuration:
74 */ 78 */
79 early_find_smp_config();
80 /*
81 * get boot-time SMP configuration:
82 */
83 if (smp_found_config)
84 early_get_smp_config();
85}
86
75int __init get_memcfg_numaq(void) 87int __init get_memcfg_numaq(void)
76{ 88{
89 early_check_numaq();
90 if (!found_numaq)
91 return 0;
77 smp_dump_qct(); 92 smp_dump_qct();
78 return 1; 93 return 1;
79} 94}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 74f0c5ea2a03..e0f571d58c19 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
30#include <asm/setup.h> 30#include <asm/setup.h>
31#include <asm/arch_hooks.h> 31#include <asm/arch_hooks.h>
32#include <asm/time.h> 32#include <asm/time.h>
33#include <asm/pgalloc.h>
33#include <asm/irq.h> 34#include <asm/irq.h>
34#include <asm/delay.h> 35#include <asm/delay.h>
35#include <asm/fixmap.h> 36#include <asm/fixmap.h>
@@ -139,7 +140,9 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
139 /* If the operation is a nop, then nop the callsite */ 140 /* If the operation is a nop, then nop the callsite */
140 ret = paravirt_patch_nop(); 141 ret = paravirt_patch_nop();
141 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
142 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) 143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
144 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
145 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
143 /* If operation requires a jmp, then jmp */ 146 /* If operation requires a jmp, then jmp */
144 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); 147 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
145 else 148 else
@@ -190,7 +193,9 @@ static void native_flush_tlb_single(unsigned long addr)
190 193
191/* These are in entry.S */ 194/* These are in entry.S */
192extern void native_iret(void); 195extern void native_iret(void);
193extern void native_irq_enable_syscall_ret(void); 196extern void native_irq_enable_sysexit(void);
197extern void native_usergs_sysret32(void);
198extern void native_usergs_sysret64(void);
194 199
195static int __init print_banner(void) 200static int __init print_banner(void)
196{ 201{
@@ -280,7 +285,7 @@ struct pv_time_ops pv_time_ops = {
280 .get_wallclock = native_get_wallclock, 285 .get_wallclock = native_get_wallclock,
281 .set_wallclock = native_set_wallclock, 286 .set_wallclock = native_set_wallclock,
282 .sched_clock = native_sched_clock, 287 .sched_clock = native_sched_clock,
283 .get_cpu_khz = native_calculate_cpu_khz, 288 .get_tsc_khz = native_calibrate_tsc,
284}; 289};
285 290
286struct pv_irq_ops pv_irq_ops = { 291struct pv_irq_ops pv_irq_ops = {
@@ -291,6 +296,9 @@ struct pv_irq_ops pv_irq_ops = {
291 .irq_enable = native_irq_enable, 296 .irq_enable = native_irq_enable,
292 .safe_halt = native_safe_halt, 297 .safe_halt = native_safe_halt,
293 .halt = native_halt, 298 .halt = native_halt,
299#ifdef CONFIG_X86_64
300 .adjust_exception_frame = paravirt_nop,
301#endif
294}; 302};
295 303
296struct pv_cpu_ops pv_cpu_ops = { 304struct pv_cpu_ops pv_cpu_ops = {
@@ -321,12 +329,23 @@ struct pv_cpu_ops pv_cpu_ops = {
321 .store_idt = native_store_idt, 329 .store_idt = native_store_idt,
322 .store_tr = native_store_tr, 330 .store_tr = native_store_tr,
323 .load_tls = native_load_tls, 331 .load_tls = native_load_tls,
332#ifdef CONFIG_X86_64
333 .load_gs_index = native_load_gs_index,
334#endif
324 .write_ldt_entry = native_write_ldt_entry, 335 .write_ldt_entry = native_write_ldt_entry,
325 .write_gdt_entry = native_write_gdt_entry, 336 .write_gdt_entry = native_write_gdt_entry,
326 .write_idt_entry = native_write_idt_entry, 337 .write_idt_entry = native_write_idt_entry,
327 .load_sp0 = native_load_sp0, 338 .load_sp0 = native_load_sp0,
328 339
329 .irq_enable_syscall_ret = native_irq_enable_syscall_ret, 340#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
341 .irq_enable_sysexit = native_irq_enable_sysexit,
342#endif
343#ifdef CONFIG_X86_64
344#ifdef CONFIG_IA32_EMULATION
345 .usergs_sysret32 = native_usergs_sysret32,
346#endif
347 .usergs_sysret64 = native_usergs_sysret64,
348#endif
330 .iret = native_iret, 349 .iret = native_iret,
331 .swapgs = native_swapgs, 350 .swapgs = native_swapgs,
332 351
@@ -366,6 +385,9 @@ struct pv_mmu_ops pv_mmu_ops = {
366 .flush_tlb_single = native_flush_tlb_single, 385 .flush_tlb_single = native_flush_tlb_single,
367 .flush_tlb_others = native_flush_tlb_others, 386 .flush_tlb_others = native_flush_tlb_others,
368 387
388 .pgd_alloc = __paravirt_pgd_alloc,
389 .pgd_free = paravirt_nop,
390
369 .alloc_pte = paravirt_nop, 391 .alloc_pte = paravirt_nop,
370 .alloc_pmd = paravirt_nop, 392 .alloc_pmd = paravirt_nop,
371 .alloc_pmd_clone = paravirt_nop, 393 .alloc_pmd_clone = paravirt_nop,
@@ -380,6 +402,9 @@ struct pv_mmu_ops pv_mmu_ops = {
380 .pte_update = paravirt_nop, 402 .pte_update = paravirt_nop,
381 .pte_update_defer = paravirt_nop, 403 .pte_update_defer = paravirt_nop,
382 404
405 .ptep_modify_prot_start = __ptep_modify_prot_start,
406 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
407
383#ifdef CONFIG_HIGHPTE 408#ifdef CONFIG_HIGHPTE
384 .kmap_atomic_pte = kmap_atomic, 409 .kmap_atomic_pte = kmap_atomic,
385#endif 410#endif
@@ -403,6 +428,7 @@ struct pv_mmu_ops pv_mmu_ops = {
403#endif /* PAGETABLE_LEVELS >= 3 */ 428#endif /* PAGETABLE_LEVELS >= 3 */
404 429
405 .pte_val = native_pte_val, 430 .pte_val = native_pte_val,
431 .pte_flags = native_pte_val,
406 .pgd_val = native_pgd_val, 432 .pgd_val = native_pgd_val,
407 433
408 .make_pte = native_make_pte, 434 .make_pte = native_make_pte,
@@ -416,6 +442,8 @@ struct pv_mmu_ops pv_mmu_ops = {
416 .enter = paravirt_nop, 442 .enter = paravirt_nop,
417 .leave = paravirt_nop, 443 .leave = paravirt_nop,
418 }, 444 },
445
446 .set_fixmap = native_set_fixmap,
419}; 447};
420 448
421EXPORT_SYMBOL_GPL(pv_time_ops); 449EXPORT_SYMBOL_GPL(pv_time_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 82fc5fcab4f4..58262218781b 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
5DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); 5DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
6DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); 6DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
7DEF_NATIVE(pv_cpu_ops, iret, "iret"); 7DEF_NATIVE(pv_cpu_ops, iret, "iret");
8DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); 8DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
9DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); 9DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
10DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); 10DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
29 PATCH_SITE(pv_irq_ops, restore_fl); 29 PATCH_SITE(pv_irq_ops, restore_fl);
30 PATCH_SITE(pv_irq_ops, save_fl); 30 PATCH_SITE(pv_irq_ops, save_fl);
31 PATCH_SITE(pv_cpu_ops, iret); 31 PATCH_SITE(pv_cpu_ops, iret);
32 PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); 32 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
33 PATCH_SITE(pv_mmu_ops, read_cr2); 33 PATCH_SITE(pv_mmu_ops, read_cr2);
34 PATCH_SITE(pv_mmu_ops, read_cr3); 34 PATCH_SITE(pv_mmu_ops, read_cr3);
35 PATCH_SITE(pv_mmu_ops, write_cr3); 35 PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 7d904e138d7e..061d01df9ae6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -14,8 +14,9 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, clts, "clts"); 14DEF_NATIVE(pv_cpu_ops, clts, "clts");
15DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 15DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
16 16
17/* the three commands give us more control to how to return from a syscall */ 17DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
18DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); 18DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
19DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); 20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
20 21
21unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 22unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +36,9 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
35 PATCH_SITE(pv_irq_ops, irq_enable); 36 PATCH_SITE(pv_irq_ops, irq_enable);
36 PATCH_SITE(pv_irq_ops, irq_disable); 37 PATCH_SITE(pv_irq_ops, irq_disable);
37 PATCH_SITE(pv_cpu_ops, iret); 38 PATCH_SITE(pv_cpu_ops, iret);
38 PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); 39 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
40 PATCH_SITE(pv_cpu_ops, usergs_sysret32);
41 PATCH_SITE(pv_cpu_ops, usergs_sysret64);
39 PATCH_SITE(pv_cpu_ops, swapgs); 42 PATCH_SITE(pv_cpu_ops, swapgs);
40 PATCH_SITE(pv_mmu_ops, read_cr2); 43 PATCH_SITE(pv_mmu_ops, read_cr2);
41 PATCH_SITE(pv_mmu_ops, read_cr3); 44 PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e28ec497e142..6959b5c45df4 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1394,7 +1394,7 @@ void __init detect_calgary(void)
1394 return; 1394 return;
1395 } 1395 }
1396 1396
1397 specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); 1397 specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
1398 1398
1399 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { 1399 for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
1400 struct calgary_bus_info *info = &bus_info[bus]; 1400 struct calgary_bus_info *info = &bus_info[bus];
@@ -1459,7 +1459,7 @@ int __init calgary_iommu_init(void)
1459 if (ret) { 1459 if (ret) {
1460 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " 1460 printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
1461 "falling back to no_iommu\n", ret); 1461 "falling back to no_iommu\n", ret);
1462 if (end_pfn > MAX_DMA32_PFN) 1462 if (max_pfn > MAX_DMA32_PFN)
1463 printk(KERN_ERR "WARNING more than 4GB of memory, " 1463 printk(KERN_ERR "WARNING more than 4GB of memory, "
1464 "32bit PCI may malfunction.\n"); 1464 "32bit PCI may malfunction.\n");
1465 return ret; 1465 return ret;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index dc00a1331ace..8467ec2320f1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -7,6 +7,7 @@
7#include <asm/dma.h> 7#include <asm/dma.h>
8#include <asm/gart.h> 8#include <asm/gart.h>
9#include <asm/calgary.h> 9#include <asm/calgary.h>
10#include <asm/amd_iommu.h>
10 11
11int forbid_dac __read_mostly; 12int forbid_dac __read_mostly;
12EXPORT_SYMBOL(forbid_dac); 13EXPORT_SYMBOL(forbid_dac);
@@ -74,13 +75,17 @@ early_param("dma32_size", parse_dma32_size_opt);
74void __init dma32_reserve_bootmem(void) 75void __init dma32_reserve_bootmem(void)
75{ 76{
76 unsigned long size, align; 77 unsigned long size, align;
77 if (end_pfn <= MAX_DMA32_PFN) 78 if (max_pfn <= MAX_DMA32_PFN)
78 return; 79 return;
79 80
81 /*
82 * check aperture_64.c allocate_aperture() for reason about
83 * using 512M as goal
84 */
80 align = 64ULL<<20; 85 align = 64ULL<<20;
81 size = round_up(dma32_bootmem_size, align); 86 size = round_up(dma32_bootmem_size, align);
82 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 87 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
83 __pa(MAX_DMA_ADDRESS)); 88 512ULL<<20);
84 if (dma32_bootmem_ptr) 89 if (dma32_bootmem_ptr)
85 dma32_bootmem_size = size; 90 dma32_bootmem_size = size;
86 else 91 else
@@ -88,17 +93,14 @@ void __init dma32_reserve_bootmem(void)
88} 93}
89static void __init dma32_free_bootmem(void) 94static void __init dma32_free_bootmem(void)
90{ 95{
91 int node;
92 96
93 if (end_pfn <= MAX_DMA32_PFN) 97 if (max_pfn <= MAX_DMA32_PFN)
94 return; 98 return;
95 99
96 if (!dma32_bootmem_ptr) 100 if (!dma32_bootmem_ptr)
97 return; 101 return;
98 102
99 for_each_online_node(node) 103 free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
100 free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
101 dma32_bootmem_size);
102 104
103 dma32_bootmem_ptr = NULL; 105 dma32_bootmem_ptr = NULL;
104 dma32_bootmem_size = 0; 106 dma32_bootmem_size = 0;
@@ -122,6 +124,8 @@ void __init pci_iommu_alloc(void)
122 124
123 detect_intel_iommu(); 125 detect_intel_iommu();
124 126
127 amd_iommu_detect();
128
125#ifdef CONFIG_SWIOTLB 129#ifdef CONFIG_SWIOTLB
126 pci_swiotlb_init(); 130 pci_swiotlb_init();
127#endif 131#endif
@@ -357,7 +361,7 @@ int dma_supported(struct device *dev, u64 mask)
357EXPORT_SYMBOL(dma_supported); 361EXPORT_SYMBOL(dma_supported);
358 362
359/* Allocate DMA memory on node near device */ 363/* Allocate DMA memory on node near device */
360noinline struct page * 364static noinline struct page *
361dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) 365dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
362{ 366{
363 int node; 367 int node;
@@ -502,6 +506,8 @@ static int __init pci_iommu_init(void)
502 506
503 intel_iommu_init(); 507 intel_iommu_init();
504 508
509 amd_iommu_init();
510
505#ifdef CONFIG_GART_IOMMU 511#ifdef CONFIG_GART_IOMMU
506 gart_iommu_init(); 512 gart_iommu_init();
507#endif 513#endif
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index aa8ec928caa8..d0d18db5d2a4 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -104,7 +104,6 @@ static unsigned long alloc_iommu(struct device *dev, int size)
104 size, base_index, boundary_size, 0); 104 size, base_index, boundary_size, 0);
105 } 105 }
106 if (offset != -1) { 106 if (offset != -1) {
107 set_bit_string(iommu_gart_bitmap, offset, size);
108 next_bit = offset+size; 107 next_bit = offset+size;
109 if (next_bit >= iommu_pages) { 108 if (next_bit >= iommu_pages) {
110 next_bit = 0; 109 next_bit = 0;
@@ -534,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
534 unsigned aper_size = 0, aper_base_32, aper_order; 533 unsigned aper_size = 0, aper_base_32, aper_order;
535 u64 aper_base; 534 u64 aper_base;
536 535
537 pci_read_config_dword(dev, 0x94, &aper_base_32); 536 pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
538 pci_read_config_dword(dev, 0x90, &aper_order); 537 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
539 aper_order = (aper_order >> 1) & 7; 538 aper_order = (aper_order >> 1) & 7;
540 539
541 aper_base = aper_base_32 & 0x7fff; 540 aper_base = aper_base_32 & 0x7fff;
@@ -549,14 +548,63 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
549 return aper_base; 548 return aper_base;
550} 549}
551 550
551static void enable_gart_translations(void)
552{
553 int i;
554
555 for (i = 0; i < num_k8_northbridges; i++) {
556 struct pci_dev *dev = k8_northbridges[i];
557
558 enable_gart_translation(dev, __pa(agp_gatt_table));
559 }
560}
561
562/*
563 * If fix_up_north_bridges is set, the north bridges have to be fixed up on
564 * resume in the same way as they are handled in gart_iommu_hole_init().
565 */
566static bool fix_up_north_bridges;
567static u32 aperture_order;
568static u32 aperture_alloc;
569
570void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
571{
572 fix_up_north_bridges = true;
573 aperture_order = aper_order;
574 aperture_alloc = aper_alloc;
575}
576
552static int gart_resume(struct sys_device *dev) 577static int gart_resume(struct sys_device *dev)
553{ 578{
579 printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n");
580
581 if (fix_up_north_bridges) {
582 int i;
583
584 printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n");
585
586 for (i = 0; i < num_k8_northbridges; i++) {
587 struct pci_dev *dev = k8_northbridges[i];
588
589 /*
590 * Don't enable translations just yet. That is the next
591 * step. Restore the pre-suspend aperture settings.
592 */
593 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL,
594 aperture_order << 1);
595 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE,
596 aperture_alloc >> 25);
597 }
598 }
599
600 enable_gart_translations();
601
554 return 0; 602 return 0;
555} 603}
556 604
557static int gart_suspend(struct sys_device *dev, pm_message_t state) 605static int gart_suspend(struct sys_device *dev, pm_message_t state)
558{ 606{
559 return -EINVAL; 607 return 0;
560} 608}
561 609
562static struct sysdev_class gart_sysdev_class = { 610static struct sysdev_class gart_sysdev_class = {
@@ -614,27 +662,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
614 memset(gatt, 0, gatt_size); 662 memset(gatt, 0, gatt_size);
615 agp_gatt_table = gatt; 663 agp_gatt_table = gatt;
616 664
617 for (i = 0; i < num_k8_northbridges; i++) { 665 enable_gart_translations();
618 u32 gatt_reg;
619 u32 ctl;
620
621 dev = k8_northbridges[i];
622 gatt_reg = __pa(gatt) >> 12;
623 gatt_reg <<= 4;
624 pci_write_config_dword(dev, 0x98, gatt_reg);
625 pci_read_config_dword(dev, 0x90, &ctl);
626
627 ctl |= 1;
628 ctl &= ~((1<<4) | (1<<5));
629
630 pci_write_config_dword(dev, 0x90, ctl);
631 }
632 666
633 error = sysdev_class_register(&gart_sysdev_class); 667 error = sysdev_class_register(&gart_sysdev_class);
634 if (!error) 668 if (!error)
635 error = sysdev_register(&device_gart); 669 error = sysdev_register(&device_gart);
636 if (error) 670 if (error)
637 panic("Could not register gart_sysdev -- would corrupt data on next suspend"); 671 panic("Could not register gart_sysdev -- would corrupt data on next suspend");
672
638 flush_gart(); 673 flush_gart();
639 674
640 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 675 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
@@ -677,11 +712,11 @@ void gart_iommu_shutdown(void)
677 u32 ctl; 712 u32 ctl;
678 713
679 dev = k8_northbridges[i]; 714 dev = k8_northbridges[i];
680 pci_read_config_dword(dev, 0x90, &ctl); 715 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
681 716
682 ctl &= ~1; 717 ctl &= ~GARTEN;
683 718
684 pci_write_config_dword(dev, 0x90, ctl); 719 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
685 } 720 }
686} 721}
687 722
@@ -716,10 +751,10 @@ void __init gart_iommu_init(void)
716 return; 751 return;
717 752
718 if (no_iommu || 753 if (no_iommu ||
719 (!force_iommu && end_pfn <= MAX_DMA32_PFN) || 754 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
720 !gart_iommu_aperture || 755 !gart_iommu_aperture ||
721 (no_agp && init_k8_gatt(&info) < 0)) { 756 (no_agp && init_k8_gatt(&info) < 0)) {
722 if (end_pfn > MAX_DMA32_PFN) { 757 if (max_pfn > MAX_DMA32_PFN) {
723 printk(KERN_WARNING "More than 4GB of memory " 758 printk(KERN_WARNING "More than 4GB of memory "
724 "but GART IOMMU not available.\n" 759 "but GART IOMMU not available.\n"
725 KERN_WARNING "falling back to iommu=soft.\n"); 760 KERN_WARNING "falling back to iommu=soft.\n");
@@ -788,10 +823,10 @@ void __init gart_iommu_init(void)
788 wbinvd(); 823 wbinvd();
789 824
790 /* 825 /*
791 * Try to workaround a bug (thanks to BenH) 826 * Try to workaround a bug (thanks to BenH):
792 * Set unmapped entries to a scratch page instead of 0. 827 * Set unmapped entries to a scratch page instead of 0.
793 * Any prefetches that hit unmapped entries won't get an bus abort 828 * Any prefetches that hit unmapped entries won't get an bus abort
794 * then. 829 * then. (P2P bridge may be prefetching on DMA reads).
795 */ 830 */
796 scratch = get_zeroed_page(GFP_KERNEL); 831 scratch = get_zeroed_page(GFP_KERNEL);
797 if (!scratch) 832 if (!scratch)
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 490da7f4b8d0..82299cd1d04d 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -38,7 +38,7 @@ const struct dma_mapping_ops swiotlb_dma_ops = {
38void __init pci_swiotlb_init(void) 38void __init pci_swiotlb_init(void)
39{ 39{
40 /* don't initialize swiotlb if iommu=off (no_iommu=1) */ 40 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
41 if (!iommu_detected && !no_iommu && end_pfn > MAX_DMA32_PFN) 41 if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
42 swiotlb = 1; 42 swiotlb = 1;
43 if (swiotlb_force) 43 if (swiotlb_force)
44 swiotlb = 1; 44 swiotlb = 1;
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
new file mode 100644
index 000000000000..675a48c404a5
--- /dev/null
+++ b/arch/x86/kernel/probe_roms_32.c
@@ -0,0 +1,166 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/uaccess.h>
4#include <linux/mmzone.h>
5#include <linux/ioport.h>
6#include <linux/seq_file.h>
7#include <linux/console.h>
8#include <linux/init.h>
9#include <linux/edd.h>
10#include <linux/dmi.h>
11#include <linux/pfn.h>
12#include <linux/pci.h>
13#include <asm/pci-direct.h>
14
15
16#include <asm/e820.h>
17#include <asm/mmzone.h>
18#include <asm/setup.h>
19#include <asm/sections.h>
20#include <asm/io.h>
21#include <setup_arch.h>
22
23static struct resource system_rom_resource = {
24 .name = "System ROM",
25 .start = 0xf0000,
26 .end = 0xfffff,
27 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
28};
29
30static struct resource extension_rom_resource = {
31 .name = "Extension ROM",
32 .start = 0xe0000,
33 .end = 0xeffff,
34 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
35};
36
37static struct resource adapter_rom_resources[] = { {
38 .name = "Adapter ROM",
39 .start = 0xc8000,
40 .end = 0,
41 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
42}, {
43 .name = "Adapter ROM",
44 .start = 0,
45 .end = 0,
46 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
47}, {
48 .name = "Adapter ROM",
49 .start = 0,
50 .end = 0,
51 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
52}, {
53 .name = "Adapter ROM",
54 .start = 0,
55 .end = 0,
56 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
57}, {
58 .name = "Adapter ROM",
59 .start = 0,
60 .end = 0,
61 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
62}, {
63 .name = "Adapter ROM",
64 .start = 0,
65 .end = 0,
66 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
67} };
68
69static struct resource video_rom_resource = {
70 .name = "Video ROM",
71 .start = 0xc0000,
72 .end = 0xc7fff,
73 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
74};
75
76#define ROMSIGNATURE 0xaa55
77
78static int __init romsignature(const unsigned char *rom)
79{
80 const unsigned short * const ptr = (const unsigned short *)rom;
81 unsigned short sig;
82
83 return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
84}
85
86static int __init romchecksum(const unsigned char *rom, unsigned long length)
87{
88 unsigned char sum, c;
89
90 for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
91 sum += c;
92 return !length && !sum;
93}
94
95void __init probe_roms(void)
96{
97 const unsigned char *rom;
98 unsigned long start, length, upper;
99 unsigned char c;
100 int i;
101
102 /* video rom */
103 upper = adapter_rom_resources[0].start;
104 for (start = video_rom_resource.start; start < upper; start += 2048) {
105 rom = isa_bus_to_virt(start);
106 if (!romsignature(rom))
107 continue;
108
109 video_rom_resource.start = start;
110
111 if (probe_kernel_address(rom + 2, c) != 0)
112 continue;
113
114 /* 0 < length <= 0x7f * 512, historically */
115 length = c * 512;
116
117 /* if checksum okay, trust length byte */
118 if (length && romchecksum(rom, length))
119 video_rom_resource.end = start + length - 1;
120
121 request_resource(&iomem_resource, &video_rom_resource);
122 break;
123 }
124
125 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
126 if (start < upper)
127 start = upper;
128
129 /* system rom */
130 request_resource(&iomem_resource, &system_rom_resource);
131 upper = system_rom_resource.start;
132
133 /* check for extension rom (ignore length byte!) */
134 rom = isa_bus_to_virt(extension_rom_resource.start);
135 if (romsignature(rom)) {
136 length = extension_rom_resource.end - extension_rom_resource.start + 1;
137 if (romchecksum(rom, length)) {
138 request_resource(&iomem_resource, &extension_rom_resource);
139 upper = extension_rom_resource.start;
140 }
141 }
142
143 /* check for adapter roms on 2k boundaries */
144 for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
145 rom = isa_bus_to_virt(start);
146 if (!romsignature(rom))
147 continue;
148
149 if (probe_kernel_address(rom + 2, c) != 0)
150 continue;
151
152 /* 0 < length <= 0x7f * 512, historically */
153 length = c * 512;
154
155 /* but accept any length that fits if checksum okay */
156 if (!length || start + length > upper || !romchecksum(rom, length))
157 continue;
158
159 adapter_rom_resources[i].start = start;
160 adapter_rom_resources[i].end = start + length - 1;
161 request_resource(&iomem_resource, &adapter_rom_resources[i]);
162
163 start = adapter_rom_resources[i++].end & ~2047UL;
164 }
165}
166
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ba370dc8685b..4061d63aabe7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -6,6 +6,7 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h>
9 10
10struct kmem_cache *task_xstate_cachep; 11struct kmem_cache *task_xstate_cachep;
11 12
@@ -45,6 +46,76 @@ void arch_task_cache_init(void)
45 SLAB_PANIC, NULL); 46 SLAB_PANIC, NULL);
46} 47}
47 48
49/*
50 * Idle related variables and functions
51 */
52unsigned long boot_option_idle_override = 0;
53EXPORT_SYMBOL(boot_option_idle_override);
54
55/*
56 * Powermanagement idle function, if any..
57 */
58void (*pm_idle)(void);
59EXPORT_SYMBOL(pm_idle);
60
61#ifdef CONFIG_X86_32
62/*
63 * This halt magic was a workaround for ancient floppy DMA
64 * wreckage. It should be safe to remove.
65 */
66static int hlt_counter;
67void disable_hlt(void)
68{
69 hlt_counter++;
70}
71EXPORT_SYMBOL(disable_hlt);
72
73void enable_hlt(void)
74{
75 hlt_counter--;
76}
77EXPORT_SYMBOL(enable_hlt);
78
79static inline int hlt_use_halt(void)
80{
81 return (!hlt_counter && boot_cpu_data.hlt_works_ok);
82}
83#else
84static inline int hlt_use_halt(void)
85{
86 return 1;
87}
88#endif
89
90/*
91 * We use this if we don't have any better
92 * idle routine..
93 */
94void default_idle(void)
95{
96 if (hlt_use_halt()) {
97 current_thread_info()->status &= ~TS_POLLING;
98 /*
99 * TS_POLLING-cleared state must be visible before we
100 * test NEED_RESCHED:
101 */
102 smp_mb();
103
104 if (!need_resched())
105 safe_halt(); /* enables interrupts racelessly */
106 else
107 local_irq_enable();
108 current_thread_info()->status |= TS_POLLING;
109 } else {
110 local_irq_enable();
111 /* loop is done by the caller */
112 cpu_relax();
113 }
114}
115#ifdef CONFIG_APM_MODULE
116EXPORT_SYMBOL(default_idle);
117#endif
118
48static void do_nothing(void *unused) 119static void do_nothing(void *unused)
49{ 120{
50} 121}
@@ -122,44 +193,129 @@ static void poll_idle(void)
122 * 193 *
123 * idle=mwait overrides this decision and forces the usage of mwait. 194 * idle=mwait overrides this decision and forces the usage of mwait.
124 */ 195 */
196
197#define MWAIT_INFO 0x05
198#define MWAIT_ECX_EXTENDED_INFO 0x01
199#define MWAIT_EDX_C1 0xf0
200
125static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) 201static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
126{ 202{
203 u32 eax, ebx, ecx, edx;
204
127 if (force_mwait) 205 if (force_mwait)
128 return 1; 206 return 1;
129 207
130 if (c->x86_vendor == X86_VENDOR_AMD) { 208 if (c->cpuid_level < MWAIT_INFO)
131 switch(c->x86) { 209 return 0;
132 case 0x10: 210
133 case 0x11: 211 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
134 return 0; 212 /* Check, whether EDX has extended info about MWAIT */
135 } 213 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
136 } 214 return 1;
215
216 /*
217 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
218 * C1 supports MWAIT
219 */
220 return (edx & MWAIT_EDX_C1);
221}
222
223/*
224 * Check for AMD CPUs, which have potentially C1E support
225 */
226static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
227{
228 if (c->x86_vendor != X86_VENDOR_AMD)
229 return 0;
230
231 if (c->x86 < 0x0F)
232 return 0;
233
234 /* Family 0x0f models < rev F do not have C1E */
235 if (c->x86 == 0x0f && c->x86_model < 0x40)
236 return 0;
237
137 return 1; 238 return 1;
138} 239}
139 240
140void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 241/*
242 * C1E aware idle routine. We check for C1E active in the interrupt
243 * pending message MSR. If we detect C1E, then we handle it the same
244 * way as C3 power states (local apic timer and TSC stop)
245 */
246static void c1e_idle(void)
141{ 247{
142 static int selected; 248 static cpumask_t c1e_mask = CPU_MASK_NONE;
249 static int c1e_detected;
143 250
144 if (selected) 251 if (need_resched())
145 return; 252 return;
253
254 if (!c1e_detected) {
255 u32 lo, hi;
256
257 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
258 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
259 c1e_detected = 1;
260 mark_tsc_unstable("TSC halt in C1E");
261 printk(KERN_INFO "System has C1E enabled\n");
262 }
263 }
264
265 if (c1e_detected) {
266 int cpu = smp_processor_id();
267
268 if (!cpu_isset(cpu, c1e_mask)) {
269 cpu_set(cpu, c1e_mask);
270 /*
271 * Force broadcast so ACPI can not interfere. Needs
272 * to run with interrupts enabled as it uses
273 * smp_function_call.
274 */
275 local_irq_enable();
276 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
277 &cpu);
278 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
279 cpu);
280 local_irq_disable();
281 }
282 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
283
284 default_idle();
285
286 /*
287 * The switch back from broadcast mode needs to be
288 * called with interrupts disabled.
289 */
290 local_irq_disable();
291 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
292 local_irq_enable();
293 } else
294 default_idle();
295}
296
297void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
298{
146#ifdef CONFIG_X86_SMP 299#ifdef CONFIG_X86_SMP
147 if (pm_idle == poll_idle && smp_num_siblings > 1) { 300 if (pm_idle == poll_idle && smp_num_siblings > 1) {
148 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 301 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
149 " performance may degrade.\n"); 302 " performance may degrade.\n");
150 } 303 }
151#endif 304#endif
305 if (pm_idle)
306 return;
307
152 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { 308 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
153 /* 309 /*
154 * Skip, if setup has overridden idle.
155 * One CPU supports mwait => All CPUs supports mwait 310 * One CPU supports mwait => All CPUs supports mwait
156 */ 311 */
157 if (!pm_idle) { 312 printk(KERN_INFO "using mwait in idle threads.\n");
158 printk(KERN_INFO "using mwait in idle threads.\n"); 313 pm_idle = mwait_idle;
159 pm_idle = mwait_idle; 314 } else if (check_c1e_idle(c)) {
160 } 315 printk(KERN_INFO "using C1E aware idle routine\n");
161 } 316 pm_idle = c1e_idle;
162 selected = 1; 317 } else
318 pm_idle = default_idle;
163} 319}
164 320
165static int __init idle_setup(char *str) 321static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e2db9ac5c61c..9a139f6c9df3 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,11 +58,6 @@
58 58
59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 59asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
60 60
61static int hlt_counter;
62
63unsigned long boot_option_idle_override = 0;
64EXPORT_SYMBOL(boot_option_idle_override);
65
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 61DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 62EXPORT_PER_CPU_SYMBOL(current_task);
68 63
@@ -77,57 +72,24 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
77 return ((unsigned long *)tsk->thread.sp)[3]; 72 return ((unsigned long *)tsk->thread.sp)[3];
78} 73}
79 74
80/* 75#ifdef CONFIG_HOTPLUG_CPU
81 * Powermanagement idle function, if any.. 76#include <asm/nmi.h>
82 */
83void (*pm_idle)(void);
84EXPORT_SYMBOL(pm_idle);
85 77
86void disable_hlt(void) 78static void cpu_exit_clear(void)
87{ 79{
88 hlt_counter++; 80 int cpu = raw_smp_processor_id();
89}
90 81
91EXPORT_SYMBOL(disable_hlt); 82 idle_task_exit();
92 83
93void enable_hlt(void) 84 cpu_uninit();
94{ 85 irq_ctx_exit(cpu);
95 hlt_counter--;
96}
97 86
98EXPORT_SYMBOL(enable_hlt); 87 cpu_clear(cpu, cpu_callout_map);
99 88 cpu_clear(cpu, cpu_callin_map);
100/*
101 * We use this if we don't have any better
102 * idle routine..
103 */
104void default_idle(void)
105{
106 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
107 current_thread_info()->status &= ~TS_POLLING;
108 /*
109 * TS_POLLING-cleared state must be visible before we
110 * test NEED_RESCHED:
111 */
112 smp_mb();
113 89
114 if (!need_resched()) 90 numa_remove_cpu(cpu);
115 safe_halt(); /* enables interrupts racelessly */
116 else
117 local_irq_enable();
118 current_thread_info()->status |= TS_POLLING;
119 } else {
120 local_irq_enable();
121 /* loop is done by the caller */
122 cpu_relax();
123 }
124} 91}
125#ifdef CONFIG_APM_MODULE
126EXPORT_SYMBOL(default_idle);
127#endif
128 92
129#ifdef CONFIG_HOTPLUG_CPU
130#include <asm/nmi.h>
131/* We don't actually take CPU down, just spin without interrupts. */ 93/* We don't actually take CPU down, just spin without interrupts. */
132static inline void play_dead(void) 94static inline void play_dead(void)
133{ 95{
@@ -168,24 +130,19 @@ void cpu_idle(void)
168 while (1) { 130 while (1) {
169 tick_nohz_stop_sched_tick(); 131 tick_nohz_stop_sched_tick();
170 while (!need_resched()) { 132 while (!need_resched()) {
171 void (*idle)(void);
172 133
173 check_pgt_cache(); 134 check_pgt_cache();
174 rmb(); 135 rmb();
175 idle = pm_idle;
176 136
177 if (rcu_pending(cpu)) 137 if (rcu_pending(cpu))
178 rcu_check_callbacks(cpu, 0); 138 rcu_check_callbacks(cpu, 0);
179 139
180 if (!idle)
181 idle = default_idle;
182
183 if (cpu_is_offline(cpu)) 140 if (cpu_is_offline(cpu))
184 play_dead(); 141 play_dead();
185 142
186 local_irq_disable(); 143 local_irq_disable();
187 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 144 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
188 idle(); 145 pm_idle();
189 } 146 }
190 tick_nohz_restart_sched_tick(); 147 tick_nohz_restart_sched_tick();
191 preempt_enable_no_resched(); 148 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c6eb5c91e5f6..db5eb963e4df 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -56,15 +56,6 @@ asmlinkage extern void ret_from_fork(void);
56 56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58 58
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
66EXPORT_SYMBOL(pm_idle);
67
68static ATOMIC_NOTIFIER_HEAD(idle_notifier); 59static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69 60
70void idle_notifier_register(struct notifier_block *n) 61void idle_notifier_register(struct notifier_block *n)
@@ -94,25 +85,6 @@ void exit_idle(void)
94 __exit_idle(); 85 __exit_idle();
95} 86}
96 87
97/*
98 * We use this if we don't have any better
99 * idle routine..
100 */
101void default_idle(void)
102{
103 current_thread_info()->status &= ~TS_POLLING;
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
109 if (!need_resched())
110 safe_halt(); /* enables interrupts racelessly */
111 else
112 local_irq_enable();
113 current_thread_info()->status |= TS_POLLING;
114}
115
116#ifdef CONFIG_HOTPLUG_CPU 88#ifdef CONFIG_HOTPLUG_CPU
117DECLARE_PER_CPU(int, cpu_state); 89DECLARE_PER_CPU(int, cpu_state);
118 90
@@ -150,12 +122,9 @@ void cpu_idle(void)
150 while (1) { 122 while (1) {
151 tick_nohz_stop_sched_tick(); 123 tick_nohz_stop_sched_tick();
152 while (!need_resched()) { 124 while (!need_resched()) {
153 void (*idle)(void);
154 125
155 rmb(); 126 rmb();
156 idle = pm_idle; 127
157 if (!idle)
158 idle = default_idle;
159 if (cpu_is_offline(smp_processor_id())) 128 if (cpu_is_offline(smp_processor_id()))
160 play_dead(); 129 play_dead();
161 /* 130 /*
@@ -165,7 +134,7 @@ void cpu_idle(void)
165 */ 134 */
166 local_irq_disable(); 135 local_irq_disable();
167 enter_idle(); 136 enter_idle();
168 idle(); 137 pm_idle();
169 /* In many cases the interrupt that ended idle 138 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle 139 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */ 140 loops can be woken up without interrupt. */
@@ -366,10 +335,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
366 p->thread.fs = me->thread.fs; 335 p->thread.fs = me->thread.fs;
367 p->thread.gs = me->thread.gs; 336 p->thread.gs = me->thread.gs;
368 337
369 asm("mov %%gs,%0" : "=m" (p->thread.gsindex)); 338 savesegment(gs, p->thread.gsindex);
370 asm("mov %%fs,%0" : "=m" (p->thread.fsindex)); 339 savesegment(fs, p->thread.fsindex);
371 asm("mov %%es,%0" : "=m" (p->thread.es)); 340 savesegment(es, p->thread.es);
372 asm("mov %%ds,%0" : "=m" (p->thread.ds)); 341 savesegment(ds, p->thread.ds);
373 342
374 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 343 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
375 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 344 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
@@ -408,7 +377,9 @@ out:
408void 377void
409start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 378start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
410{ 379{
411 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0)); 380 loadsegment(fs, 0);
381 loadsegment(es, 0);
382 loadsegment(ds, 0);
412 load_gs_index(0); 383 load_gs_index(0);
413 regs->ip = new_ip; 384 regs->ip = new_ip;
414 regs->sp = new_sp; 385 regs->sp = new_sp;
@@ -567,6 +538,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
567 *next = &next_p->thread; 538 *next = &next_p->thread;
568 int cpu = smp_processor_id(); 539 int cpu = smp_processor_id();
569 struct tss_struct *tss = &per_cpu(init_tss, cpu); 540 struct tss_struct *tss = &per_cpu(init_tss, cpu);
541 unsigned fsindex, gsindex;
570 542
571 /* we're going to use this soon, after a few expensive things */ 543 /* we're going to use this soon, after a few expensive things */
572 if (next_p->fpu_counter>5) 544 if (next_p->fpu_counter>5)
@@ -581,22 +553,38 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
581 * Switch DS and ES. 553 * Switch DS and ES.
582 * This won't pick up thread selector changes, but I guess that is ok. 554 * This won't pick up thread selector changes, but I guess that is ok.
583 */ 555 */
584 asm volatile("mov %%es,%0" : "=m" (prev->es)); 556 savesegment(es, prev->es);
585 if (unlikely(next->es | prev->es)) 557 if (unlikely(next->es | prev->es))
586 loadsegment(es, next->es); 558 loadsegment(es, next->es);
587 559
588 asm volatile ("mov %%ds,%0" : "=m" (prev->ds)); 560 savesegment(ds, prev->ds);
589 if (unlikely(next->ds | prev->ds)) 561 if (unlikely(next->ds | prev->ds))
590 loadsegment(ds, next->ds); 562 loadsegment(ds, next->ds);
591 563
564
565 /* We must save %fs and %gs before load_TLS() because
566 * %fs and %gs may be cleared by load_TLS().
567 *
568 * (e.g. xen_load_tls())
569 */
570 savesegment(fs, fsindex);
571 savesegment(gs, gsindex);
572
592 load_TLS(next, cpu); 573 load_TLS(next, cpu);
593 574
575 /*
576 * Leave lazy mode, flushing any hypercalls made here.
577 * This must be done before restoring TLS segments so
578 * the GDT and LDT are properly updated, and must be
579 * done before math_state_restore, so the TS bit is up
580 * to date.
581 */
582 arch_leave_lazy_cpu_mode();
583
594 /* 584 /*
595 * Switch FS and GS. 585 * Switch FS and GS.
596 */ 586 */
597 { 587 {
598 unsigned fsindex;
599 asm volatile("movl %%fs,%0" : "=r" (fsindex));
600 /* segment register != 0 always requires a reload. 588 /* segment register != 0 always requires a reload.
601 also reload when it has changed. 589 also reload when it has changed.
602 when prev process used 64bit base always reload 590 when prev process used 64bit base always reload
@@ -614,10 +602,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
614 if (next->fs) 602 if (next->fs)
615 wrmsrl(MSR_FS_BASE, next->fs); 603 wrmsrl(MSR_FS_BASE, next->fs);
616 prev->fsindex = fsindex; 604 prev->fsindex = fsindex;
617 } 605
618 {
619 unsigned gsindex;
620 asm volatile("movl %%gs,%0" : "=r" (gsindex));
621 if (unlikely(gsindex | next->gsindex | prev->gs)) { 606 if (unlikely(gsindex | next->gsindex | prev->gs)) {
622 load_gs_index(next->gsindex); 607 load_gs_index(next->gsindex);
623 if (gsindex) 608 if (gsindex)
@@ -798,7 +783,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
798 set_32bit_tls(task, FS_TLS, addr); 783 set_32bit_tls(task, FS_TLS, addr);
799 if (doit) { 784 if (doit) {
800 load_TLS(&task->thread, cpu); 785 load_TLS(&task->thread, cpu);
801 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); 786 loadsegment(fs, FS_TLS_SEL);
802 } 787 }
803 task->thread.fsindex = FS_TLS_SEL; 788 task->thread.fsindex = FS_TLS_SEL;
804 task->thread.fs = 0; 789 task->thread.fs = 0;
@@ -808,7 +793,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
808 if (doit) { 793 if (doit) {
809 /* set the selector to 0 to not confuse 794 /* set the selector to 0 to not confuse
810 __switch_to */ 795 __switch_to */
811 asm volatile("movl %0,%%fs" :: "r" (0)); 796 loadsegment(fs, 0);
812 ret = checking_wrmsrl(MSR_FS_BASE, addr); 797 ret = checking_wrmsrl(MSR_FS_BASE, addr);
813 } 798 }
814 } 799 }
@@ -831,7 +816,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
831 if (task->thread.gsindex == GS_TLS_SEL) 816 if (task->thread.gsindex == GS_TLS_SEL)
832 base = read_32bit_tls(task, GS_TLS); 817 base = read_32bit_tls(task, GS_TLS);
833 else if (doit) { 818 else if (doit) {
834 asm("movl %%gs,%0" : "=r" (gsindex)); 819 savesegment(gs, gsindex);
835 if (gsindex) 820 if (gsindex)
836 rdmsrl(MSR_KERNEL_GS_BASE, base); 821 rdmsrl(MSR_KERNEL_GS_BASE, base);
837 else 822 else
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a7835f282936..77040b6070e1 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -943,13 +943,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
943 return copy_regset_to_user(child, &user_x86_32_view, 943 return copy_regset_to_user(child, &user_x86_32_view,
944 REGSET_XFP, 944 REGSET_XFP,
945 0, sizeof(struct user_fxsr_struct), 945 0, sizeof(struct user_fxsr_struct),
946 datap); 946 datap) ? -EIO : 0;
947 947
948 case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ 948 case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */
949 return copy_regset_from_user(child, &user_x86_32_view, 949 return copy_regset_from_user(child, &user_x86_32_view,
950 REGSET_XFP, 950 REGSET_XFP,
951 0, sizeof(struct user_fxsr_struct), 951 0, sizeof(struct user_fxsr_struct),
952 datap); 952 datap) ? -EIO : 0;
953#endif 953#endif
954 954
955#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 955#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d89a648fe710..79bdcd11c66e 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -65,6 +65,7 @@ static enum {
65 ICH_FORCE_HPET_RESUME, 65 ICH_FORCE_HPET_RESUME,
66 VT8237_FORCE_HPET_RESUME, 66 VT8237_FORCE_HPET_RESUME,
67 NVIDIA_FORCE_HPET_RESUME, 67 NVIDIA_FORCE_HPET_RESUME,
68 ATI_FORCE_HPET_RESUME,
68} force_hpet_resume_type; 69} force_hpet_resume_type;
69 70
70static void __iomem *rcba_base; 71static void __iomem *rcba_base;
@@ -158,6 +159,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
158 159
159DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, 160DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
160 ich_force_enable_hpet); 161 ich_force_enable_hpet);
162DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0,
163 ich_force_enable_hpet);
161DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, 164DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
162 ich_force_enable_hpet); 165 ich_force_enable_hpet);
163DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, 166DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
@@ -174,6 +177,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
174 177
175static struct pci_dev *cached_dev; 178static struct pci_dev *cached_dev;
176 179
180static void hpet_print_force_info(void)
181{
182 printk(KERN_INFO "HPET not enabled in BIOS. "
183 "You might try hpet=force boot option\n");
184}
185
177static void old_ich_force_hpet_resume(void) 186static void old_ich_force_hpet_resume(void)
178{ 187{
179 u32 val; 188 u32 val;
@@ -253,6 +262,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
253{ 262{
254 if (hpet_force_user) 263 if (hpet_force_user)
255 old_ich_force_enable_hpet(dev); 264 old_ich_force_enable_hpet(dev);
265 else
266 hpet_print_force_info();
256} 267}
257 268
258DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, 269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
@@ -290,8 +301,13 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev)
290{ 301{
291 u32 uninitialized_var(val); 302 u32 uninitialized_var(val);
292 303
293 if (!hpet_force_user || hpet_address || force_hpet_address) 304 if (hpet_address || force_hpet_address)
305 return;
306
307 if (!hpet_force_user) {
308 hpet_print_force_info();
294 return; 309 return;
310 }
295 311
296 pci_read_config_dword(dev, 0x68, &val); 312 pci_read_config_dword(dev, 0x68, &val);
297 /* 313 /*
@@ -330,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
330DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, 346DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
331 vt8237_force_enable_hpet); 347 vt8237_force_enable_hpet);
332 348
349static void ati_force_hpet_resume(void)
350{
351 pci_write_config_dword(cached_dev, 0x14, 0xfed00000);
352 printk(KERN_DEBUG "Force enabled HPET at resume\n");
353}
354
355static void ati_force_enable_hpet(struct pci_dev *dev)
356{
357 u32 uninitialized_var(val);
358
359 if (hpet_address || force_hpet_address)
360 return;
361
362 if (!hpet_force_user) {
363 hpet_print_force_info();
364 return;
365 }
366
367 pci_write_config_dword(dev, 0x14, 0xfed00000);
368 pci_read_config_dword(dev, 0x14, &val);
369 force_hpet_address = val;
370 force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
371 dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
372 force_hpet_address);
373 cached_dev = dev;
374 return;
375}
376DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
377 ati_force_enable_hpet);
378
333/* 379/*
334 * Undocumented chipset feature taken from LinuxBIOS. 380 * Undocumented chipset feature taken from LinuxBIOS.
335 */ 381 */
@@ -343,8 +389,13 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev)
343{ 389{
344 u32 uninitialized_var(val); 390 u32 uninitialized_var(val);
345 391
346 if (!hpet_force_user || hpet_address || force_hpet_address) 392 if (hpet_address || force_hpet_address)
393 return;
394
395 if (!hpet_force_user) {
396 hpet_print_force_info();
347 return; 397 return;
398 }
348 399
349 pci_write_config_dword(dev, 0x44, 0xfed00001); 400 pci_write_config_dword(dev, 0x44, 0xfed00001);
350 pci_read_config_dword(dev, 0x44, &val); 401 pci_read_config_dword(dev, 0x44, &val);
@@ -397,6 +448,9 @@ void force_hpet_resume(void)
397 case NVIDIA_FORCE_HPET_RESUME: 448 case NVIDIA_FORCE_HPET_RESUME:
398 nvidia_force_hpet_resume(); 449 nvidia_force_hpet_resume();
399 return; 450 return;
451 case ATI_FORCE_HPET_RESUME:
452 ati_force_hpet_resume();
453 return;
400 default: 454 default:
401 break; 455 break;
402 } 456 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f6be7d5f82f8..f8a62160e151 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -27,7 +27,7 @@
27void (*pm_power_off)(void); 27void (*pm_power_off)(void);
28EXPORT_SYMBOL(pm_power_off); 28EXPORT_SYMBOL(pm_power_off);
29 29
30static long no_idt[3]; 30static const struct desc_ptr no_idt = {};
31static int reboot_mode; 31static int reboot_mode;
32enum reboot_type reboot_type = BOOT_KBD; 32enum reboot_type reboot_type = BOOT_KBD;
33int reboot_force; 33int reboot_force;
@@ -201,15 +201,15 @@ core_initcall(reboot_init);
201 controller to pulse the CPU reset line, which is more thorough, but 201 controller to pulse the CPU reset line, which is more thorough, but
202 doesn't work with at least one type of 486 motherboard. It is easy 202 doesn't work with at least one type of 486 motherboard. It is easy
203 to stop this code working; hence the copious comments. */ 203 to stop this code working; hence the copious comments. */
204static unsigned long long 204static const unsigned long long
205real_mode_gdt_entries [3] = 205real_mode_gdt_entries [3] =
206{ 206{
207 0x0000000000000000ULL, /* Null descriptor */ 207 0x0000000000000000ULL, /* Null descriptor */
208 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ 208 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
209 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ 209 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
210}; 210};
211 211
212static struct desc_ptr 212static const struct desc_ptr
213real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, 213real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
214real_mode_idt = { 0x3ff, 0 }; 214real_mode_idt = { 0x3ff, 0 };
215 215
@@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 };
231 231
232 More could be done here to set up the registers as if a CPU reset had 232 More could be done here to set up the registers as if a CPU reset had
233 occurred; hopefully real BIOSs don't assume much. */ 233 occurred; hopefully real BIOSs don't assume much. */
234static unsigned char real_mode_switch [] = 234static const unsigned char real_mode_switch [] =
235{ 235{
236 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 236 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
237 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ 237 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
@@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] =
245 0x24, 0x10, /* f: andb $0x10,al */ 245 0x24, 0x10, /* f: andb $0x10,al */
246 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ 246 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
247}; 247};
248static unsigned char jump_to_bios [] = 248static const unsigned char jump_to_bios [] =
249{ 249{
250 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ 250 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
251}; 251};
@@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] =
255 * specified by the code and length parameters. 255 * specified by the code and length parameters.
256 * We assume that length will aways be less that 100! 256 * We assume that length will aways be less that 100!
257 */ 257 */
258void machine_real_restart(unsigned char *code, int length) 258void machine_real_restart(const unsigned char *code, int length)
259{ 259{
260 local_irq_disable(); 260 local_irq_disable();
261 261
@@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void)
368 } 368 }
369 369
370 case BOOT_TRIPLE: 370 case BOOT_TRIPLE:
371 load_idt((const struct desc_ptr *)&no_idt); 371 load_idt(&no_idt);
372 __asm__ __volatile__("int3"); 372 __asm__ __volatile__("int3");
373 373
374 reboot_type = BOOT_KBD; 374 reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index dec0b5ec25c2..61a837743fe5 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -49,7 +49,7 @@ struct device_fixup {
49 void (*reboot_fixup)(struct pci_dev *); 49 void (*reboot_fixup)(struct pci_dev *);
50}; 50};
51 51
52static struct device_fixup fixups_table[] = { 52static const struct device_fixup fixups_table[] = {
53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, 54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, 55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
@@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = {
64 */ 64 */
65void mach_reboot_fixups(void) 65void mach_reboot_fixups(void)
66{ 66{
67 struct device_fixup *cur; 67 const struct device_fixup *cur;
68 struct pci_dev *dev; 68 struct pci_dev *dev;
69 int i; 69 int i;
70 70
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6f80b852a196..987b6fde3a99 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1,139 +1,881 @@
1#include <linux/kernel.h> 1/*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
5 *
6 * Memory region support
7 * David Parsons <orc@pell.chi.il.us>, July-August 1999
8 *
9 * Added E820 sanitization routine (removes overlapping memory regions);
10 * Brian Moyle <bmoyle@mvista.com>, February 2001
11 *
12 * Moved CPU detection code to cpu/${cpu}.c
13 * Patrick Mochel <mochel@osdl.org>, March 2002
14 *
15 * Provisions for empty E820 memory regions (reported by certain BIOSes).
16 * Alex Achenbach <xela@slit.de>, December 2002.
17 *
18 */
19
20/*
21 * This file handles the architecture-dependent parts of initialization
22 */
23
24#include <linux/sched.h>
25#include <linux/mm.h>
26#include <linux/mmzone.h>
27#include <linux/screen_info.h>
28#include <linux/ioport.h>
29#include <linux/acpi.h>
30#include <linux/apm_bios.h>
31#include <linux/initrd.h>
32#include <linux/bootmem.h>
33#include <linux/seq_file.h>
34#include <linux/console.h>
35#include <linux/mca.h>
36#include <linux/root_dev.h>
37#include <linux/highmem.h>
2#include <linux/module.h> 38#include <linux/module.h>
39#include <linux/efi.h>
3#include <linux/init.h> 40#include <linux/init.h>
4#include <linux/bootmem.h> 41#include <linux/edd.h>
42#include <linux/iscsi_ibft.h>
43#include <linux/nodemask.h>
44#include <linux/kexec.h>
45#include <linux/dmi.h>
46#include <linux/pfn.h>
47#include <linux/pci.h>
48#include <asm/pci-direct.h>
49#include <linux/init_ohci1394_dma.h>
50#include <linux/kvm_para.h>
51
52#include <linux/errno.h>
53#include <linux/kernel.h>
54#include <linux/stddef.h>
55#include <linux/unistd.h>
56#include <linux/ptrace.h>
57#include <linux/slab.h>
58#include <linux/user.h>
59#include <linux/delay.h>
60#include <linux/highmem.h>
61
62#include <linux/kallsyms.h>
63#include <linux/edd.h>
64#include <linux/iscsi_ibft.h>
65#include <linux/kexec.h>
66#include <linux/cpufreq.h>
67#include <linux/dma-mapping.h>
68#include <linux/ctype.h>
69#include <linux/uaccess.h>
70
5#include <linux/percpu.h> 71#include <linux/percpu.h>
72#include <linux/crash_dump.h>
73
74#include <video/edid.h>
75
76#include <asm/mtrr.h>
77#include <asm/apic.h>
78#include <asm/e820.h>
79#include <asm/mpspec.h>
80#include <asm/setup.h>
81#include <asm/arch_hooks.h>
82#include <asm/efi.h>
83#include <asm/sections.h>
84#include <asm/dmi.h>
85#include <asm/io_apic.h>
86#include <asm/ist.h>
87#include <asm/vmi.h>
88#include <setup_arch.h>
89#include <asm/bios_ebda.h>
90#include <asm/cacheflush.h>
91#include <asm/processor.h>
92#include <asm/bugs.h>
93
94#include <asm/system.h>
95#include <asm/vsyscall.h>
6#include <asm/smp.h> 96#include <asm/smp.h>
97#include <asm/desc.h>
98#include <asm/dma.h>
99#include <asm/gart.h>
100#include <asm/mmu_context.h>
101#include <asm/proto.h>
102
103#include <mach_apic.h>
104#include <asm/paravirt.h>
105
7#include <asm/percpu.h> 106#include <asm/percpu.h>
8#include <asm/sections.h> 107#include <asm/sections.h>
9#include <asm/processor.h>
10#include <asm/setup.h>
11#include <asm/topology.h> 108#include <asm/topology.h>
12#include <asm/mpspec.h>
13#include <asm/apicdef.h> 109#include <asm/apicdef.h>
110#ifdef CONFIG_X86_64
111#include <asm/numa_64.h>
112#endif
14 113
15#ifdef CONFIG_X86_LOCAL_APIC 114#ifndef ARCH_SETUP
16unsigned int num_processors; 115#define ARCH_SETUP
17unsigned disabled_cpus __cpuinitdata; 116#endif
18/* Processor that is doing the boot up */
19unsigned int boot_cpu_physical_apicid = -1U;
20EXPORT_SYMBOL(boot_cpu_physical_apicid);
21 117
22DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; 118#ifndef CONFIG_DEBUG_BOOT_PARAMS
23EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); 119struct boot_params __initdata boot_params;
120#else
121struct boot_params boot_params;
122#endif
24 123
25/* Bitmask of physically existing CPUs */ 124/*
26physid_mask_t phys_cpu_present_map; 125 * Machine setup..
126 */
127static struct resource data_resource = {
128 .name = "Kernel data",
129 .start = 0,
130 .end = 0,
131 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
132};
133
134static struct resource code_resource = {
135 .name = "Kernel code",
136 .start = 0,
137 .end = 0,
138 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
139};
140
141static struct resource bss_resource = {
142 .name = "Kernel bss",
143 .start = 0,
144 .end = 0,
145 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
146};
147
148
149#ifdef CONFIG_X86_32
150/* This value is set up by the early boot code to point to the value
151 immediately after the boot time page tables. It contains a *physical*
152 address, and must not be in the .bss segment! */
153unsigned long init_pg_tables_start __initdata = ~0UL;
154unsigned long init_pg_tables_end __initdata = ~0UL;
155
156static struct resource video_ram_resource = {
157 .name = "Video RAM area",
158 .start = 0xa0000,
159 .end = 0xbffff,
160 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
161};
162
163/* cpu data as detected by the assembly code in head.S */
164struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
165/* common cpu data for all cpus */
166struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
167EXPORT_SYMBOL(boot_cpu_data);
168static void set_mca_bus(int x)
169{
170#ifdef CONFIG_MCA
171 MCA_bus = x;
172#endif
173}
174
175unsigned int def_to_bigsmp;
176
177/* for MCA, but anyone else can use it if they want */
178unsigned int machine_id;
179unsigned int machine_submodel_id;
180unsigned int BIOS_revision;
181
182struct apm_info apm_info;
183EXPORT_SYMBOL(apm_info);
184
185#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
186 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
187struct ist_info ist_info;
188EXPORT_SYMBOL(ist_info);
189#else
190struct ist_info ist_info;
27#endif 191#endif
28 192
29#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 193#else
194struct cpuinfo_x86 boot_cpu_data __read_mostly;
195EXPORT_SYMBOL(boot_cpu_data);
196#endif
197
198
199#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
200unsigned long mmu_cr4_features;
201#else
202unsigned long mmu_cr4_features = X86_CR4_PAE;
203#endif
204
205/* Boot loader ID as an integer, for the benefit of proc_dointvec */
206int bootloader_type;
207
30/* 208/*
31 * Copy data used in early init routines from the initial arrays to the 209 * Early DMI memory
32 * per cpu data areas. These arrays then become expendable and the 210 */
33 * *_early_ptr's are zeroed indicating that the static arrays are gone. 211int dmi_alloc_index;
212char dmi_alloc_data[DMI_MAX_DATA];
213
214/*
215 * Setup options
216 */
217struct screen_info screen_info;
218EXPORT_SYMBOL(screen_info);
219struct edid_info edid_info;
220EXPORT_SYMBOL_GPL(edid_info);
221
222extern int root_mountflags;
223
224unsigned long saved_video_mode;
225
226#define RAMDISK_IMAGE_START_MASK 0x07FF
227#define RAMDISK_PROMPT_FLAG 0x8000
228#define RAMDISK_LOAD_FLAG 0x4000
229
230static char __initdata command_line[COMMAND_LINE_SIZE];
231
232#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
233struct edd edd;
234#ifdef CONFIG_EDD_MODULE
235EXPORT_SYMBOL(edd);
236#endif
237/**
238 * copy_edd() - Copy the BIOS EDD information
239 * from boot_params into a safe place.
240 *
34 */ 241 */
35static void __init setup_per_cpu_maps(void) 242static inline void copy_edd(void)
243{
244 memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
245 sizeof(edd.mbr_signature));
246 memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
247 edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
248 edd.edd_info_nr = boot_params.eddbuf_entries;
249}
250#else
251static inline void copy_edd(void)
252{
253}
254#endif
255
256#ifdef CONFIG_BLK_DEV_INITRD
257
258#ifdef CONFIG_X86_32
259
260#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
261static void __init relocate_initrd(void)
36{ 262{
37 int cpu;
38 263
39 for_each_possible_cpu(cpu) { 264 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
40 per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; 265 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
41 per_cpu(x86_bios_cpu_apicid, cpu) = 266 u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
42 x86_bios_cpu_apicid_init[cpu]; 267 u64 ramdisk_here;
43#ifdef CONFIG_NUMA 268 unsigned long slop, clen, mapaddr;
44 per_cpu(x86_cpu_to_node_map, cpu) = 269 char *p, *q;
45 x86_cpu_to_node_map_init[cpu]; 270
271 /* We need to move the initrd down into lowmem */
272 ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
273 PAGE_SIZE);
274
275 if (ramdisk_here == -1ULL)
276 panic("Cannot find place for new RAMDISK of size %lld\n",
277 ramdisk_size);
278
279 /* Note: this includes all the lowmem currently occupied by
280 the initrd, we rely on that fact to keep the data intact. */
281 reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
282 "NEW RAMDISK");
283 initrd_start = ramdisk_here + PAGE_OFFSET;
284 initrd_end = initrd_start + ramdisk_size;
285 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
286 ramdisk_here, ramdisk_here + ramdisk_size);
287
288 q = (char *)initrd_start;
289
290 /* Copy any lowmem portion of the initrd */
291 if (ramdisk_image < end_of_lowmem) {
292 clen = end_of_lowmem - ramdisk_image;
293 p = (char *)__va(ramdisk_image);
294 memcpy(q, p, clen);
295 q += clen;
296 ramdisk_image += clen;
297 ramdisk_size -= clen;
298 }
299
300 /* Copy the highmem portion of the initrd */
301 while (ramdisk_size) {
302 slop = ramdisk_image & ~PAGE_MASK;
303 clen = ramdisk_size;
304 if (clen > MAX_MAP_CHUNK-slop)
305 clen = MAX_MAP_CHUNK-slop;
306 mapaddr = ramdisk_image & PAGE_MASK;
307 p = early_ioremap(mapaddr, clen+slop);
308 memcpy(q, p+slop, clen);
309 early_iounmap(p, clen+slop);
310 q += clen;
311 ramdisk_image += clen;
312 ramdisk_size -= clen;
313 }
314 /* high pages is not converted by early_res_to_bootmem */
315 ramdisk_image = boot_params.hdr.ramdisk_image;
316 ramdisk_size = boot_params.hdr.ramdisk_size;
317 printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
318 " %08llx - %08llx\n",
319 ramdisk_image, ramdisk_image + ramdisk_size - 1,
320 ramdisk_here, ramdisk_here + ramdisk_size - 1);
321}
46#endif 322#endif
323
324static void __init reserve_initrd(void)
325{
326 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
327 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
328 u64 ramdisk_end = ramdisk_image + ramdisk_size;
329 u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
330
331 if (!boot_params.hdr.type_of_loader ||
332 !ramdisk_image || !ramdisk_size)
333 return; /* No initrd provided by bootloader */
334
335 initrd_start = 0;
336
337 if (ramdisk_size >= (end_of_lowmem>>1)) {
338 free_early(ramdisk_image, ramdisk_end);
339 printk(KERN_ERR "initrd too large to handle, "
340 "disabling initrd\n");
341 return;
342 }
343
344 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
345 ramdisk_end);
346
347
348 if (ramdisk_end <= end_of_lowmem) {
349 /* All in lowmem, easy case */
350 /*
351 * don't need to reserve again, already reserved early
352 * in i386_start_kernel
353 */
354 initrd_start = ramdisk_image + PAGE_OFFSET;
355 initrd_end = initrd_start + ramdisk_size;
356 return;
47 } 357 }
48 358
49 /* indicate the early static arrays will soon be gone */ 359#ifdef CONFIG_X86_32
50 x86_cpu_to_apicid_early_ptr = NULL; 360 relocate_initrd();
51 x86_bios_cpu_apicid_early_ptr = NULL; 361#else
52#ifdef CONFIG_NUMA 362 printk(KERN_ERR "initrd extends beyond end of memory "
53 x86_cpu_to_node_map_early_ptr = NULL; 363 "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
364 ramdisk_end, end_of_lowmem);
365 initrd_start = 0;
54#endif 366#endif
367 free_early(ramdisk_image, ramdisk_end);
368}
369#else
370static void __init reserve_initrd(void)
371{
372}
373#endif /* CONFIG_BLK_DEV_INITRD */
374
375static void __init parse_setup_data(void)
376{
377 struct setup_data *data;
378 u64 pa_data;
379
380 if (boot_params.hdr.version < 0x0209)
381 return;
382 pa_data = boot_params.hdr.setup_data;
383 while (pa_data) {
384 data = early_ioremap(pa_data, PAGE_SIZE);
385 switch (data->type) {
386 case SETUP_E820_EXT:
387 parse_e820_ext(data, pa_data);
388 break;
389 default:
390 break;
391 }
392 pa_data = data->next;
393 early_iounmap(data, PAGE_SIZE);
394 }
55} 395}
56 396
57#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP 397static void __init e820_reserve_setup_data(void)
58cpumask_t *cpumask_of_cpu_map __read_mostly; 398{
59EXPORT_SYMBOL(cpumask_of_cpu_map); 399 struct setup_data *data;
400 u64 pa_data;
401 int found = 0;
402
403 if (boot_params.hdr.version < 0x0209)
404 return;
405 pa_data = boot_params.hdr.setup_data;
406 while (pa_data) {
407 data = early_ioremap(pa_data, sizeof(*data));
408 e820_update_range(pa_data, sizeof(*data)+data->len,
409 E820_RAM, E820_RESERVED_KERN);
410 found = 1;
411 pa_data = data->next;
412 early_iounmap(data, sizeof(*data));
413 }
414 if (!found)
415 return;
416
417 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
418 memcpy(&e820_saved, &e820, sizeof(struct e820map));
419 printk(KERN_INFO "extended physical RAM map:\n");
420 e820_print_map("reserve setup_data");
421}
60 422
61/* requires nr_cpu_ids to be initialized */ 423static void __init reserve_early_setup_data(void)
62static void __init setup_cpumask_of_cpu(void)
63{ 424{
64 int i; 425 struct setup_data *data;
426 u64 pa_data;
427 char buf[32];
428
429 if (boot_params.hdr.version < 0x0209)
430 return;
431 pa_data = boot_params.hdr.setup_data;
432 while (pa_data) {
433 data = early_ioremap(pa_data, sizeof(*data));
434 sprintf(buf, "setup data %x", data->type);
435 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
436 pa_data = data->next;
437 early_iounmap(data, sizeof(*data));
438 }
439}
440
441/*
442 * --------- Crashkernel reservation ------------------------------
443 */
444
445#ifdef CONFIG_KEXEC
446
447/**
448 * Reserve @size bytes of crashkernel memory at any suitable offset.
449 *
450 * @size: Size of the crashkernel memory to reserve.
451 * Returns the base address on success, and -1ULL on failure.
452 */
453unsigned long long find_and_reserve_crashkernel(unsigned long long size)
454{
455 const unsigned long long alignment = 16<<20; /* 16M */
456 unsigned long long start = 0LL;
457
458 while (1) {
459 int ret;
460
461 start = find_e820_area(start, ULONG_MAX, size, alignment);
462 if (start == -1ULL)
463 return start;
464
465 /* try to reserve it */
466 ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
467 if (ret >= 0)
468 return start;
469
470 start += alignment;
471 }
472}
473
474static inline unsigned long long get_total_mem(void)
475{
476 unsigned long long total;
477
478 total = max_low_pfn - min_low_pfn;
479#ifdef CONFIG_HIGHMEM
480 total += highend_pfn - highstart_pfn;
481#endif
482
483 return total << PAGE_SHIFT;
484}
485
486static void __init reserve_crashkernel(void)
487{
488 unsigned long long total_mem;
489 unsigned long long crash_size, crash_base;
490 int ret;
491
492 total_mem = get_total_mem();
493
494 ret = parse_crashkernel(boot_command_line, total_mem,
495 &crash_size, &crash_base);
496 if (ret != 0 || crash_size <= 0)
497 return;
498
499 /* 0 means: find the address automatically */
500 if (crash_base <= 0) {
501 crash_base = find_and_reserve_crashkernel(crash_size);
502 if (crash_base == -1ULL) {
503 pr_info("crashkernel reservation failed. "
504 "No suitable area found.\n");
505 return;
506 }
507 } else {
508 ret = reserve_bootmem_generic(crash_base, crash_size,
509 BOOTMEM_EXCLUSIVE);
510 if (ret < 0) {
511 pr_info("crashkernel reservation failed - "
512 "memory is in use\n");
513 return;
514 }
515 }
516
517 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
518 "for crashkernel (System RAM: %ldMB)\n",
519 (unsigned long)(crash_size >> 20),
520 (unsigned long)(crash_base >> 20),
521 (unsigned long)(total_mem >> 20));
65 522
66 /* alloc_bootmem zeroes memory */ 523 crashk_res.start = crash_base;
67 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); 524 crashk_res.end = crash_base + crash_size - 1;
68 for (i = 0; i < nr_cpu_ids; i++) 525 insert_resource(&iomem_resource, &crashk_res);
69 cpu_set(i, cpumask_of_cpu_map[i]);
70} 526}
71#else 527#else
72static inline void setup_cpumask_of_cpu(void) { } 528static void __init reserve_crashkernel(void)
529{
530}
73#endif 531#endif
74 532
75#ifdef CONFIG_X86_32 533static struct resource standard_io_resources[] = {
76/* 534 { .name = "dma1", .start = 0x00, .end = 0x1f,
77 * Great future not-so-futuristic plan: make i386 and x86_64 do it 535 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
78 * the same way 536 { .name = "pic1", .start = 0x20, .end = 0x21,
537 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
538 { .name = "timer0", .start = 0x40, .end = 0x43,
539 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
540 { .name = "timer1", .start = 0x50, .end = 0x53,
541 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
542 { .name = "keyboard", .start = 0x60, .end = 0x60,
543 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
544 { .name = "keyboard", .start = 0x64, .end = 0x64,
545 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
546 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
547 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
548 { .name = "pic2", .start = 0xa0, .end = 0xa1,
549 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
550 { .name = "dma2", .start = 0xc0, .end = 0xdf,
551 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
552 { .name = "fpu", .start = 0xf0, .end = 0xff,
553 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
554};
555
556static void __init reserve_standard_io_resources(void)
557{
558 int i;
559
560 /* request I/O space for devices used on all i[345]86 PCs */
561 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
562 request_resource(&ioport_resource, &standard_io_resources[i]);
563
564}
565
566#ifdef CONFIG_PROC_VMCORE
567/* elfcorehdr= specifies the location of elf core header
568 * stored by the crashed kernel. This option will be passed
569 * by kexec loader to the capture kernel.
79 */ 570 */
80unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 571static int __init setup_elfcorehdr(char *arg)
81EXPORT_SYMBOL(__per_cpu_offset); 572{
573 char *end;
574 if (!arg)
575 return -EINVAL;
576 elfcorehdr_addr = memparse(arg, &end);
577 return end > arg ? 0 : -EINVAL;
578}
579early_param("elfcorehdr", setup_elfcorehdr);
82#endif 580#endif
83 581
84/* 582/*
85 * Great future plan: 583 * Determine if we were loaded by an EFI loader. If so, then we have also been
86 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 584 * passed the efi memmap, systab, etc., so we should use these data structures
87 * Always point %gs to its beginning 585 * for initialization. Note, the efi init code path is determined by the
586 * global efi_enabled. This allows the same kernel image to be used on existing
587 * systems (with a traditional BIOS) as well as on EFI systems.
588 */
589/*
590 * setup_arch - architecture-specific boot-time initializations
591 *
592 * Note: On x86_64, fixmaps are ready for use even before this is called.
88 */ 593 */
89void __init setup_per_cpu_areas(void) 594
595void __init setup_arch(char **cmdline_p)
90{ 596{
91 int i, highest_cpu = 0; 597#ifdef CONFIG_X86_32
92 unsigned long size; 598 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
599 visws_early_detect();
600 pre_setup_arch_hook();
601 early_cpu_init();
602#else
603 printk(KERN_INFO "Command line: %s\n", boot_command_line);
604#endif
93 605
94#ifdef CONFIG_HOTPLUG_CPU 606 early_ioremap_init();
95 prefill_possible_map(); 607
608 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
609 screen_info = boot_params.screen_info;
610 edid_info = boot_params.edid_info;
611#ifdef CONFIG_X86_32
612 apm_info.bios = boot_params.apm_bios_info;
613 ist_info = boot_params.ist_info;
614 if (boot_params.sys_desc_table.length != 0) {
615 set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
616 machine_id = boot_params.sys_desc_table.table[0];
617 machine_submodel_id = boot_params.sys_desc_table.table[1];
618 BIOS_revision = boot_params.sys_desc_table.table[2];
619 }
620#endif
621 saved_video_mode = boot_params.hdr.vid_mode;
622 bootloader_type = boot_params.hdr.type_of_loader;
623
624#ifdef CONFIG_BLK_DEV_RAM
625 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
626 rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
627 rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
628#endif
629#ifdef CONFIG_EFI
630 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
631#ifdef CONFIG_X86_32
632 "EL32",
633#else
634 "EL64",
96#endif 635#endif
636 4)) {
637 efi_enabled = 1;
638 efi_reserve_early();
639 }
640#endif
641
642 ARCH_SETUP
643
644 setup_memory_map();
645 parse_setup_data();
646 /* update the e820_saved too */
647 e820_reserve_setup_data();
97 648
98 /* Copy section for each CPU (we discard the original) */ 649 copy_edd();
99 size = PERCPU_ENOUGH_ROOM;
100 printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
101 size);
102 650
103 for_each_possible_cpu(i) { 651 if (!boot_params.hdr.root_flags)
104 char *ptr; 652 root_mountflags &= ~MS_RDONLY;
105#ifndef CONFIG_NEED_MULTIPLE_NODES 653 init_mm.start_code = (unsigned long) _text;
106 ptr = alloc_bootmem_pages(size); 654 init_mm.end_code = (unsigned long) _etext;
655 init_mm.end_data = (unsigned long) _edata;
656#ifdef CONFIG_X86_32
657 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
107#else 658#else
108 int node = early_cpu_to_node(i); 659 init_mm.brk = (unsigned long) &_end;
109 if (!node_online(node) || !NODE_DATA(node)) {
110 ptr = alloc_bootmem_pages(size);
111 printk(KERN_INFO
112 "cpu %d has no node or node-local memory\n", i);
113 }
114 else
115 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
116#endif 660#endif
117 if (!ptr) 661
118 panic("Cannot allocate cpu data for CPU %d\n", i); 662 code_resource.start = virt_to_phys(_text);
663 code_resource.end = virt_to_phys(_etext)-1;
664 data_resource.start = virt_to_phys(_etext);
665 data_resource.end = virt_to_phys(_edata)-1;
666 bss_resource.start = virt_to_phys(&__bss_start);
667 bss_resource.end = virt_to_phys(&__bss_stop)-1;
668
119#ifdef CONFIG_X86_64 669#ifdef CONFIG_X86_64
120 cpu_pda(i)->data_offset = ptr - __per_cpu_start; 670 early_cpu_init();
671#endif
672 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
673 *cmdline_p = command_line;
674
675 parse_early_param();
676
677 /* after early param, so could get panic from serial */
678 reserve_early_setup_data();
679
680 if (acpi_mps_check()) {
681#ifdef CONFIG_X86_LOCAL_APIC
682 disable_apic = 1;
683#endif
684 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
685 }
686
687 finish_e820_parsing();
688
689#ifdef CONFIG_X86_32
690 probe_roms();
691#endif
692
693 /* after parse_early_param, so could debug it */
694 insert_resource(&iomem_resource, &code_resource);
695 insert_resource(&iomem_resource, &data_resource);
696 insert_resource(&iomem_resource, &bss_resource);
697
698 if (efi_enabled)
699 efi_init();
700
701#ifdef CONFIG_X86_32
702 if (ppro_with_ram_bug()) {
703 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
704 E820_RESERVED);
705 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
706 printk(KERN_INFO "fixed physical RAM map:\n");
707 e820_print_map("bad_ppro");
708 }
709#else
710 early_gart_iommu_check();
711#endif
712
713 /*
714 * partially used pages are not usable - thus
715 * we are rounding upwards:
716 */
717 max_pfn = e820_end_of_ram_pfn();
718
719 /* preallocate 4k for mptable mpc */
720 early_reserve_e820_mpc_new();
721 /* update e820 for memory not covered by WB MTRRs */
722 mtrr_bp_init();
723 if (mtrr_trim_uncached_memory(max_pfn))
724 max_pfn = e820_end_of_ram_pfn();
725
726#ifdef CONFIG_X86_32
727 /* max_low_pfn get updated here */
728 find_low_pfn_range();
121#else 729#else
122 __per_cpu_offset[i] = ptr - __per_cpu_start; 730 num_physpages = max_pfn;
731
732 check_efer();
733
734 /* How many end-of-memory variables you have, grandma! */
735 /* need this before calling reserve_initrd */
736 if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
737 max_low_pfn = e820_end_of_low_ram_pfn();
738 else
739 max_low_pfn = max_pfn;
740
741 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
123#endif 742#endif
124 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
125 743
126 highest_cpu = i; 744 /* max_pfn_mapped is updated here */
745 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
746 max_pfn_mapped = max_low_pfn_mapped;
747
748#ifdef CONFIG_X86_64
749 if (max_pfn > max_low_pfn) {
750 max_pfn_mapped = init_memory_mapping(1UL<<32,
751 max_pfn<<PAGE_SHIFT);
752 /* can we preseve max_low_pfn ?*/
753 max_low_pfn = max_pfn;
127 } 754 }
755#endif
756
757 /*
758 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
759 */
128 760
129 nr_cpu_ids = highest_cpu + 1; 761#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
130 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); 762 if (init_ohci1394_dma_early)
763 init_ohci1394_dma_on_all_controllers();
764#endif
131 765
132 /* Setup percpu data maps */ 766 reserve_initrd();
133 setup_per_cpu_maps();
134 767
135 /* Setup cpumask_of_cpu map */ 768#ifdef CONFIG_X86_64
136 setup_cpumask_of_cpu(); 769 vsmp_init();
137} 770#endif
771
772 dmi_scan_machine();
773
774 io_delay_init();
138 775
776 /*
777 * Parse the ACPI tables for possible boot-time SMP configuration.
778 */
779 acpi_boot_table_init();
780
781#ifdef CONFIG_ACPI_NUMA
782 /*
783 * Parse SRAT to discover nodes.
784 */
785 acpi_numa_init();
786#endif
787
788 initmem_init(0, max_pfn);
789
790#ifdef CONFIG_X86_64
791 dma32_reserve_bootmem();
792#endif
793
794#ifdef CONFIG_ACPI_SLEEP
795 /*
796 * Reserve low memory region for sleep support.
797 */
798 acpi_reserve_bootmem();
799#endif
800#ifdef CONFIG_X86_FIND_SMP_CONFIG
801 /*
802 * Find and reserve possible boot-time SMP configuration:
803 */
804 find_smp_config();
139#endif 805#endif
806 reserve_crashkernel();
807
808 reserve_ibft_region();
809
810#ifdef CONFIG_KVM_CLOCK
811 kvmclock_init();
812#endif
813
814#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
815 /*
816 * Must be after max_low_pfn is determined, and before kernel
817 * pagetables are setup.
818 */
819 vmi_init();
820#endif
821
822 paging_init();
823
824#ifdef CONFIG_X86_64
825 map_vsyscall();
826#endif
827
828#ifdef CONFIG_X86_GENERICARCH
829 generic_apic_probe();
830#endif
831
832 early_quirks();
833
834 /*
835 * Read APIC and some other early information from ACPI tables.
836 */
837 acpi_boot_init();
838
839#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
840 /*
841 * get boot-time SMP configuration:
842 */
843 if (smp_found_config)
844 get_smp_config();
845#endif
846
847 prefill_possible_map();
848#ifdef CONFIG_X86_64
849 init_cpu_to_node();
850#endif
851
852 init_apic_mappings();
853 ioapic_init_mappings();
854
855#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
856 if (def_to_bigsmp)
857 printk(KERN_WARNING "More than 8 CPUs detected and "
858 "CONFIG_X86_PC cannot handle it.\nUse "
859 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
860#endif
861 kvm_guest_init();
862
863 e820_reserve_resources();
864 e820_mark_nosave_regions(max_low_pfn);
865
866#ifdef CONFIG_X86_32
867 request_resource(&iomem_resource, &video_ram_resource);
868#endif
869 reserve_standard_io_resources();
870
871 e820_setup_gap();
872
873#ifdef CONFIG_VT
874#if defined(CONFIG_VGA_CONSOLE)
875 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
876 conswitchp = &vga_con;
877#elif defined(CONFIG_DUMMY_CONSOLE)
878 conswitchp = &dummy_con;
879#endif
880#endif
881}
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
deleted file mode 100644
index aee0e8200777..000000000000
--- a/arch/x86/kernel/setup64.c
+++ /dev/null
@@ -1,287 +0,0 @@
1/*
2 * X86-64 specific CPU setup.
3 * Copyright (C) 1995 Linus Torvalds
4 * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
5 * See setup.c for older changelog.
6 */
7#include <linux/init.h>
8#include <linux/kernel.h>
9#include <linux/sched.h>
10#include <linux/string.h>
11#include <linux/bootmem.h>
12#include <linux/bitops.h>
13#include <linux/module.h>
14#include <linux/kgdb.h>
15#include <asm/pda.h>
16#include <asm/pgtable.h>
17#include <asm/processor.h>
18#include <asm/desc.h>
19#include <asm/atomic.h>
20#include <asm/mmu_context.h>
21#include <asm/smp.h>
22#include <asm/i387.h>
23#include <asm/percpu.h>
24#include <asm/proto.h>
25#include <asm/sections.h>
26#include <asm/setup.h>
27#include <asm/genapic.h>
28
29#ifndef CONFIG_DEBUG_BOOT_PARAMS
30struct boot_params __initdata boot_params;
31#else
32struct boot_params boot_params;
33#endif
34
35cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
36
37struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
38EXPORT_SYMBOL(_cpu_pda);
39struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
40
41struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
42
43char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
44
45unsigned long __supported_pte_mask __read_mostly = ~0UL;
46EXPORT_SYMBOL_GPL(__supported_pte_mask);
47
48static int do_not_nx __cpuinitdata = 0;
49
50/* noexec=on|off
51Control non executable mappings for 64bit processes.
52
53on Enable(default)
54off Disable
55*/
56static int __init nonx_setup(char *str)
57{
58 if (!str)
59 return -EINVAL;
60 if (!strncmp(str, "on", 2)) {
61 __supported_pte_mask |= _PAGE_NX;
62 do_not_nx = 0;
63 } else if (!strncmp(str, "off", 3)) {
64 do_not_nx = 1;
65 __supported_pte_mask &= ~_PAGE_NX;
66 }
67 return 0;
68}
69early_param("noexec", nonx_setup);
70
71int force_personality32 = 0;
72
73/* noexec32=on|off
74Control non executable heap for 32bit processes.
75To control the stack too use noexec=off
76
77on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
78off PROT_READ implies PROT_EXEC
79*/
80static int __init nonx32_setup(char *str)
81{
82 if (!strcmp(str, "on"))
83 force_personality32 &= ~READ_IMPLIES_EXEC;
84 else if (!strcmp(str, "off"))
85 force_personality32 |= READ_IMPLIES_EXEC;
86 return 1;
87}
88__setup("noexec32=", nonx32_setup);
89
90void pda_init(int cpu)
91{
92 struct x8664_pda *pda = cpu_pda(cpu);
93
94 /* Setup up data that may be needed in __get_free_pages early */
95 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
96 /* Memory clobbers used to order PDA accessed */
97 mb();
98 wrmsrl(MSR_GS_BASE, pda);
99 mb();
100
101 pda->cpunumber = cpu;
102 pda->irqcount = -1;
103 pda->kernelstack =
104 (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
105 pda->active_mm = &init_mm;
106 pda->mmu_state = 0;
107
108 if (cpu == 0) {
109 /* others are initialized in smpboot.c */
110 pda->pcurrent = &init_task;
111 pda->irqstackptr = boot_cpu_stack;
112 } else {
113 pda->irqstackptr = (char *)
114 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
115 if (!pda->irqstackptr)
116 panic("cannot allocate irqstack for cpu %d", cpu);
117 }
118
119
120 pda->irqstackptr += IRQSTACKSIZE-64;
121}
122
123char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
124__attribute__((section(".bss.page_aligned")));
125
126extern asmlinkage void ignore_sysret(void);
127
128/* May not be marked __init: used by software suspend */
129void syscall_init(void)
130{
131 /*
132 * LSTAR and STAR live in a bit strange symbiosis.
133 * They both write to the same internal register. STAR allows to set CS/DS
134 * but only a 32bit target. LSTAR sets the 64bit rip.
135 */
136 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
137 wrmsrl(MSR_LSTAR, system_call);
138 wrmsrl(MSR_CSTAR, ignore_sysret);
139
140#ifdef CONFIG_IA32_EMULATION
141 syscall32_cpu_init ();
142#endif
143
144 /* Flags to clear on syscall */
145 wrmsrl(MSR_SYSCALL_MASK,
146 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
147}
148
149void __cpuinit check_efer(void)
150{
151 unsigned long efer;
152
153 rdmsrl(MSR_EFER, efer);
154 if (!(efer & EFER_NX) || do_not_nx) {
155 __supported_pte_mask &= ~_PAGE_NX;
156 }
157}
158
159unsigned long kernel_eflags;
160
161/*
162 * Copies of the original ist values from the tss are only accessed during
163 * debugging, no special alignment required.
164 */
165DEFINE_PER_CPU(struct orig_ist, orig_ist);
166
167/*
168 * cpu_init() initializes state that is per-CPU. Some data is already
169 * initialized (naturally) in the bootstrap process, such as the GDT
170 * and IDT. We reload them nevertheless, this function acts as a
171 * 'CPU state barrier', nothing should get across.
172 * A lot of state is already set up in PDA init.
173 */
174void __cpuinit cpu_init (void)
175{
176 int cpu = stack_smp_processor_id();
177 struct tss_struct *t = &per_cpu(init_tss, cpu);
178 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
179 unsigned long v;
180 char *estacks = NULL;
181 struct task_struct *me;
182 int i;
183
184 /* CPU 0 is initialised in head64.c */
185 if (cpu != 0) {
186 pda_init(cpu);
187 } else
188 estacks = boot_exception_stacks;
189
190 me = current;
191
192 if (cpu_test_and_set(cpu, cpu_initialized))
193 panic("CPU#%d already initialized!\n", cpu);
194
195 printk("Initializing CPU#%d\n", cpu);
196
197 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
198
199 /*
200 * Initialize the per-CPU GDT with the boot GDT,
201 * and set up the GDT descriptor:
202 */
203 if (cpu)
204 memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
205
206 cpu_gdt_descr[cpu].size = GDT_SIZE;
207 load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
208 load_idt((const struct desc_ptr *)&idt_descr);
209
210 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
211 syscall_init();
212
213 wrmsrl(MSR_FS_BASE, 0);
214 wrmsrl(MSR_KERNEL_GS_BASE, 0);
215 barrier();
216
217 check_efer();
218
219 /*
220 * set up and load the per-CPU TSS
221 */
222 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
223 static const unsigned int order[N_EXCEPTION_STACKS] = {
224 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
225 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
226 };
227 if (cpu) {
228 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
229 if (!estacks)
230 panic("Cannot allocate exception stack %ld %d\n",
231 v, cpu);
232 }
233 estacks += PAGE_SIZE << order[v];
234 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
235 }
236
237 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
238 /*
239 * <= is required because the CPU will access up to
240 * 8 bits beyond the end of the IO permission bitmap.
241 */
242 for (i = 0; i <= IO_BITMAP_LONGS; i++)
243 t->io_bitmap[i] = ~0UL;
244
245 atomic_inc(&init_mm.mm_count);
246 me->active_mm = &init_mm;
247 if (me->mm)
248 BUG();
249 enter_lazy_tlb(&init_mm, me);
250
251 set_tss_desc(cpu, t);
252 load_TR_desc();
253 load_LDT(&init_mm.context);
254
255#ifdef CONFIG_KGDB
256 /*
257 * If the kgdb is connected no debug regs should be altered. This
258 * is only applicable when KGDB and a KGDB I/O module are built
259 * into the kernel and you are using early debugging with
260 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
261 */
262 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
263 arch_kgdb_ops.correct_hw_break();
264 else {
265#endif
266 /*
267 * Clear all 6 debug registers:
268 */
269
270 set_debugreg(0UL, 0);
271 set_debugreg(0UL, 1);
272 set_debugreg(0UL, 2);
273 set_debugreg(0UL, 3);
274 set_debugreg(0UL, 6);
275 set_debugreg(0UL, 7);
276#ifdef CONFIG_KGDB
277 /* If the kgdb is connected no debug regs should be altered. */
278 }
279#endif
280
281 fpu_init();
282
283 raw_local_save_flags(kernel_eflags);
284
285 if (is_uv_system())
286 uv_cpu_init();
287}
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
deleted file mode 100644
index 5a2f8e063887..000000000000
--- a/arch/x86/kernel/setup_32.c
+++ /dev/null
@@ -1,964 +0,0 @@
1/*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
5 *
6 * Memory region support
7 * David Parsons <orc@pell.chi.il.us>, July-August 1999
8 *
9 * Added E820 sanitization routine (removes overlapping memory regions);
10 * Brian Moyle <bmoyle@mvista.com>, February 2001
11 *
12 * Moved CPU detection code to cpu/${cpu}.c
13 * Patrick Mochel <mochel@osdl.org>, March 2002
14 *
15 * Provisions for empty E820 memory regions (reported by certain BIOSes).
16 * Alex Achenbach <xela@slit.de>, December 2002.
17 *
18 */
19
20/*
21 * This file handles the architecture-dependent parts of initialization
22 */
23
24#include <linux/sched.h>
25#include <linux/mm.h>
26#include <linux/mmzone.h>
27#include <linux/screen_info.h>
28#include <linux/ioport.h>
29#include <linux/acpi.h>
30#include <linux/apm_bios.h>
31#include <linux/initrd.h>
32#include <linux/bootmem.h>
33#include <linux/seq_file.h>
34#include <linux/console.h>
35#include <linux/mca.h>
36#include <linux/root_dev.h>
37#include <linux/highmem.h>
38#include <linux/module.h>
39#include <linux/efi.h>
40#include <linux/init.h>
41#include <linux/edd.h>
42#include <linux/iscsi_ibft.h>
43#include <linux/nodemask.h>
44#include <linux/kexec.h>
45#include <linux/crash_dump.h>
46#include <linux/dmi.h>
47#include <linux/pfn.h>
48#include <linux/pci.h>
49#include <linux/init_ohci1394_dma.h>
50#include <linux/kvm_para.h>
51
52#include <video/edid.h>
53
54#include <asm/mtrr.h>
55#include <asm/apic.h>
56#include <asm/e820.h>
57#include <asm/mpspec.h>
58#include <asm/mmzone.h>
59#include <asm/setup.h>
60#include <asm/arch_hooks.h>
61#include <asm/sections.h>
62#include <asm/io_apic.h>
63#include <asm/ist.h>
64#include <asm/io.h>
65#include <asm/vmi.h>
66#include <setup_arch.h>
67#include <asm/bios_ebda.h>
68#include <asm/cacheflush.h>
69#include <asm/processor.h>
70
71/* This value is set up by the early boot code to point to the value
72 immediately after the boot time page tables. It contains a *physical*
73 address, and must not be in the .bss segment! */
74unsigned long init_pg_tables_end __initdata = ~0UL;
75
76/*
77 * Machine setup..
78 */
79static struct resource data_resource = {
80 .name = "Kernel data",
81 .start = 0,
82 .end = 0,
83 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
84};
85
86static struct resource code_resource = {
87 .name = "Kernel code",
88 .start = 0,
89 .end = 0,
90 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
91};
92
93static struct resource bss_resource = {
94 .name = "Kernel bss",
95 .start = 0,
96 .end = 0,
97 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
98};
99
100static struct resource video_ram_resource = {
101 .name = "Video RAM area",
102 .start = 0xa0000,
103 .end = 0xbffff,
104 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
105};
106
107static struct resource standard_io_resources[] = { {
108 .name = "dma1",
109 .start = 0x0000,
110 .end = 0x001f,
111 .flags = IORESOURCE_BUSY | IORESOURCE_IO
112}, {
113 .name = "pic1",
114 .start = 0x0020,
115 .end = 0x0021,
116 .flags = IORESOURCE_BUSY | IORESOURCE_IO
117}, {
118 .name = "timer0",
119 .start = 0x0040,
120 .end = 0x0043,
121 .flags = IORESOURCE_BUSY | IORESOURCE_IO
122}, {
123 .name = "timer1",
124 .start = 0x0050,
125 .end = 0x0053,
126 .flags = IORESOURCE_BUSY | IORESOURCE_IO
127}, {
128 .name = "keyboard",
129 .start = 0x0060,
130 .end = 0x0060,
131 .flags = IORESOURCE_BUSY | IORESOURCE_IO
132}, {
133 .name = "keyboard",
134 .start = 0x0064,
135 .end = 0x0064,
136 .flags = IORESOURCE_BUSY | IORESOURCE_IO
137}, {
138 .name = "dma page reg",
139 .start = 0x0080,
140 .end = 0x008f,
141 .flags = IORESOURCE_BUSY | IORESOURCE_IO
142}, {
143 .name = "pic2",
144 .start = 0x00a0,
145 .end = 0x00a1,
146 .flags = IORESOURCE_BUSY | IORESOURCE_IO
147}, {
148 .name = "dma2",
149 .start = 0x00c0,
150 .end = 0x00df,
151 .flags = IORESOURCE_BUSY | IORESOURCE_IO
152}, {
153 .name = "fpu",
154 .start = 0x00f0,
155 .end = 0x00ff,
156 .flags = IORESOURCE_BUSY | IORESOURCE_IO
157} };
158
159/* cpu data as detected by the assembly code in head.S */
160struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
161/* common cpu data for all cpus */
162struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
163EXPORT_SYMBOL(boot_cpu_data);
164
165unsigned int def_to_bigsmp;
166
167#ifndef CONFIG_X86_PAE
168unsigned long mmu_cr4_features;
169#else
170unsigned long mmu_cr4_features = X86_CR4_PAE;
171#endif
172
173/* for MCA, but anyone else can use it if they want */
174unsigned int machine_id;
175unsigned int machine_submodel_id;
176unsigned int BIOS_revision;
177
178/* Boot loader ID as an integer, for the benefit of proc_dointvec */
179int bootloader_type;
180
181/* user-defined highmem size */
182static unsigned int highmem_pages = -1;
183
184/*
185 * Setup options
186 */
187struct screen_info screen_info;
188EXPORT_SYMBOL(screen_info);
189struct apm_info apm_info;
190EXPORT_SYMBOL(apm_info);
191struct edid_info edid_info;
192EXPORT_SYMBOL_GPL(edid_info);
193struct ist_info ist_info;
194#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
195 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
196EXPORT_SYMBOL(ist_info);
197#endif
198
199extern void early_cpu_init(void);
200extern int root_mountflags;
201
202unsigned long saved_video_mode;
203
204#define RAMDISK_IMAGE_START_MASK 0x07FF
205#define RAMDISK_PROMPT_FLAG 0x8000
206#define RAMDISK_LOAD_FLAG 0x4000
207
208static char __initdata command_line[COMMAND_LINE_SIZE];
209
210#ifndef CONFIG_DEBUG_BOOT_PARAMS
211struct boot_params __initdata boot_params;
212#else
213struct boot_params boot_params;
214#endif
215
216#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
217struct edd edd;
218#ifdef CONFIG_EDD_MODULE
219EXPORT_SYMBOL(edd);
220#endif
221/**
222 * copy_edd() - Copy the BIOS EDD information
223 * from boot_params into a safe place.
224 *
225 */
226static inline void copy_edd(void)
227{
228 memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
229 sizeof(edd.mbr_signature));
230 memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
231 edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
232 edd.edd_info_nr = boot_params.eddbuf_entries;
233}
234#else
235static inline void copy_edd(void)
236{
237}
238#endif
239
240int __initdata user_defined_memmap;
241
242/*
243 * "mem=nopentium" disables the 4MB page tables.
244 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
245 * to <mem>, overriding the bios size.
246 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
247 * <start> to <start>+<mem>, overriding the bios size.
248 *
249 * HPA tells me bootloaders need to parse mem=, so no new
250 * option should be mem= [also see Documentation/i386/boot.txt]
251 */
252static int __init parse_mem(char *arg)
253{
254 if (!arg)
255 return -EINVAL;
256
257 if (strcmp(arg, "nopentium") == 0) {
258 setup_clear_cpu_cap(X86_FEATURE_PSE);
259 } else {
260 /* If the user specifies memory size, we
261 * limit the BIOS-provided memory map to
262 * that size. exactmap can be used to specify
263 * the exact map. mem=number can be used to
264 * trim the existing memory map.
265 */
266 unsigned long long mem_size;
267
268 mem_size = memparse(arg, &arg);
269 limit_regions(mem_size);
270 user_defined_memmap = 1;
271 }
272 return 0;
273}
274early_param("mem", parse_mem);
275
276#ifdef CONFIG_PROC_VMCORE
277/* elfcorehdr= specifies the location of elf core header
278 * stored by the crashed kernel.
279 */
280static int __init parse_elfcorehdr(char *arg)
281{
282 if (!arg)
283 return -EINVAL;
284
285 elfcorehdr_addr = memparse(arg, &arg);
286 return 0;
287}
288early_param("elfcorehdr", parse_elfcorehdr);
289#endif /* CONFIG_PROC_VMCORE */
290
291/*
292 * highmem=size forces highmem to be exactly 'size' bytes.
293 * This works even on boxes that have no highmem otherwise.
294 * This also works to reduce highmem size on bigger boxes.
295 */
296static int __init parse_highmem(char *arg)
297{
298 if (!arg)
299 return -EINVAL;
300
301 highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
302 return 0;
303}
304early_param("highmem", parse_highmem);
305
306/*
307 * vmalloc=size forces the vmalloc area to be exactly 'size'
308 * bytes. This can be used to increase (or decrease) the
309 * vmalloc area - the default is 128m.
310 */
311static int __init parse_vmalloc(char *arg)
312{
313 if (!arg)
314 return -EINVAL;
315
316 __VMALLOC_RESERVE = memparse(arg, &arg);
317 return 0;
318}
319early_param("vmalloc", parse_vmalloc);
320
321/*
322 * reservetop=size reserves a hole at the top of the kernel address space which
323 * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
324 * so relocating the fixmap can be done before paging initialization.
325 */
326static int __init parse_reservetop(char *arg)
327{
328 unsigned long address;
329
330 if (!arg)
331 return -EINVAL;
332
333 address = memparse(arg, &arg);
334 reserve_top_address(address);
335 return 0;
336}
337early_param("reservetop", parse_reservetop);
338
339/*
340 * Determine low and high memory ranges:
341 */
342unsigned long __init find_max_low_pfn(void)
343{
344 unsigned long max_low_pfn;
345
346 max_low_pfn = max_pfn;
347 if (max_low_pfn > MAXMEM_PFN) {
348 if (highmem_pages == -1)
349 highmem_pages = max_pfn - MAXMEM_PFN;
350 if (highmem_pages + MAXMEM_PFN < max_pfn)
351 max_pfn = MAXMEM_PFN + highmem_pages;
352 if (highmem_pages + MAXMEM_PFN > max_pfn) {
353 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
354 highmem_pages = 0;
355 }
356 max_low_pfn = MAXMEM_PFN;
357#ifndef CONFIG_HIGHMEM
358 /* Maximum memory usable is what is directly addressable */
359 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
360 MAXMEM>>20);
361 if (max_pfn > MAX_NONPAE_PFN)
362 printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
363 else
364 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
365 max_pfn = MAXMEM_PFN;
366#else /* !CONFIG_HIGHMEM */
367#ifndef CONFIG_HIGHMEM64G
368 if (max_pfn > MAX_NONPAE_PFN) {
369 max_pfn = MAX_NONPAE_PFN;
370 printk(KERN_WARNING "Warning only 4GB will be used.\n");
371 printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
372 }
373#endif /* !CONFIG_HIGHMEM64G */
374#endif /* !CONFIG_HIGHMEM */
375 } else {
376 if (highmem_pages == -1)
377 highmem_pages = 0;
378#ifdef CONFIG_HIGHMEM
379 if (highmem_pages >= max_pfn) {
380 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
381 highmem_pages = 0;
382 }
383 if (highmem_pages) {
384 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
385 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
386 highmem_pages = 0;
387 }
388 max_low_pfn -= highmem_pages;
389 }
390#else
391 if (highmem_pages)
392 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
393#endif
394 }
395 return max_low_pfn;
396}
397
398#define BIOS_LOWMEM_KILOBYTES 0x413
399
400/*
401 * The BIOS places the EBDA/XBDA at the top of conventional
402 * memory, and usually decreases the reported amount of
403 * conventional memory (int 0x12) too. This also contains a
404 * workaround for Dell systems that neglect to reserve EBDA.
405 * The same workaround also avoids a problem with the AMD768MPX
406 * chipset: reserve a page before VGA to prevent PCI prefetch
407 * into it (errata #56). Usually the page is reserved anyways,
408 * unless you have no PS/2 mouse plugged in.
409 */
410static void __init reserve_ebda_region(void)
411{
412 unsigned int lowmem, ebda_addr;
413
414 /* To determine the position of the EBDA and the */
415 /* end of conventional memory, we need to look at */
416 /* the BIOS data area. In a paravirtual environment */
417 /* that area is absent. We'll just have to assume */
418 /* that the paravirt case can handle memory setup */
419 /* correctly, without our help. */
420 if (paravirt_enabled())
421 return;
422
423 /* end of low (conventional) memory */
424 lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
425 lowmem <<= 10;
426
427 /* start of EBDA area */
428 ebda_addr = get_bios_ebda();
429
430 /* Fixup: bios puts an EBDA in the top 64K segment */
431 /* of conventional memory, but does not adjust lowmem. */
432 if ((lowmem - ebda_addr) <= 0x10000)
433 lowmem = ebda_addr;
434
435 /* Fixup: bios does not report an EBDA at all. */
436 /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
437 if ((ebda_addr == 0) && (lowmem >= 0x9f000))
438 lowmem = 0x9f000;
439
440 /* Paranoia: should never happen, but... */
441 if ((lowmem == 0) || (lowmem >= 0x100000))
442 lowmem = 0x9f000;
443
444 /* reserve all memory between lowmem and the 1MB mark */
445 reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
446}
447
448#ifndef CONFIG_NEED_MULTIPLE_NODES
449static void __init setup_bootmem_allocator(void);
450static unsigned long __init setup_memory(void)
451{
452 /*
453 * partially used pages are not usable - thus
454 * we are rounding upwards:
455 */
456 min_low_pfn = PFN_UP(init_pg_tables_end);
457
458 max_low_pfn = find_max_low_pfn();
459
460#ifdef CONFIG_HIGHMEM
461 highstart_pfn = highend_pfn = max_pfn;
462 if (max_pfn > max_low_pfn) {
463 highstart_pfn = max_low_pfn;
464 }
465 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
466 pages_to_mb(highend_pfn - highstart_pfn));
467 num_physpages = highend_pfn;
468 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
469#else
470 num_physpages = max_low_pfn;
471 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
472#endif
473#ifdef CONFIG_FLATMEM
474 max_mapnr = num_physpages;
475#endif
476 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
477 pages_to_mb(max_low_pfn));
478
479 setup_bootmem_allocator();
480
481 return max_low_pfn;
482}
483
484static void __init zone_sizes_init(void)
485{
486 unsigned long max_zone_pfns[MAX_NR_ZONES];
487 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
488 max_zone_pfns[ZONE_DMA] =
489 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
490 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
491#ifdef CONFIG_HIGHMEM
492 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
493 add_active_range(0, 0, highend_pfn);
494#else
495 add_active_range(0, 0, max_low_pfn);
496#endif
497
498 free_area_init_nodes(max_zone_pfns);
499}
500#else
501extern unsigned long __init setup_memory(void);
502extern void zone_sizes_init(void);
503#endif /* !CONFIG_NEED_MULTIPLE_NODES */
504
505static inline unsigned long long get_total_mem(void)
506{
507 unsigned long long total;
508
509 total = max_low_pfn - min_low_pfn;
510#ifdef CONFIG_HIGHMEM
511 total += highend_pfn - highstart_pfn;
512#endif
513
514 return total << PAGE_SHIFT;
515}
516
517#ifdef CONFIG_KEXEC
518static void __init reserve_crashkernel(void)
519{
520 unsigned long long total_mem;
521 unsigned long long crash_size, crash_base;
522 int ret;
523
524 total_mem = get_total_mem();
525
526 ret = parse_crashkernel(boot_command_line, total_mem,
527 &crash_size, &crash_base);
528 if (ret == 0 && crash_size > 0) {
529 if (crash_base > 0) {
530 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
531 "for crashkernel (System RAM: %ldMB)\n",
532 (unsigned long)(crash_size >> 20),
533 (unsigned long)(crash_base >> 20),
534 (unsigned long)(total_mem >> 20));
535
536 if (reserve_bootmem(crash_base, crash_size,
537 BOOTMEM_EXCLUSIVE) < 0) {
538 printk(KERN_INFO "crashkernel reservation "
539 "failed - memory is in use\n");
540 return;
541 }
542
543 crashk_res.start = crash_base;
544 crashk_res.end = crash_base + crash_size - 1;
545 } else
546 printk(KERN_INFO "crashkernel reservation failed - "
547 "you have to specify a base address\n");
548 }
549}
550#else
551static inline void __init reserve_crashkernel(void)
552{}
553#endif
554
555#ifdef CONFIG_BLK_DEV_INITRD
556
557static bool do_relocate_initrd = false;
558
559static void __init reserve_initrd(void)
560{
561 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
562 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
563 unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
564 unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
565 unsigned long ramdisk_here;
566
567 initrd_start = 0;
568
569 if (!boot_params.hdr.type_of_loader ||
570 !ramdisk_image || !ramdisk_size)
571 return; /* No initrd provided by bootloader */
572
573 if (ramdisk_end < ramdisk_image) {
574 printk(KERN_ERR "initrd wraps around end of memory, "
575 "disabling initrd\n");
576 return;
577 }
578 if (ramdisk_size >= end_of_lowmem/2) {
579 printk(KERN_ERR "initrd too large to handle, "
580 "disabling initrd\n");
581 return;
582 }
583 if (ramdisk_end <= end_of_lowmem) {
584 /* All in lowmem, easy case */
585 reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
586 initrd_start = ramdisk_image + PAGE_OFFSET;
587 initrd_end = initrd_start+ramdisk_size;
588 return;
589 }
590
591 /* We need to move the initrd down into lowmem */
592 ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
593
594 /* Note: this includes all the lowmem currently occupied by
595 the initrd, we rely on that fact to keep the data intact. */
596 reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
597 initrd_start = ramdisk_here + PAGE_OFFSET;
598 initrd_end = initrd_start + ramdisk_size;
599
600 do_relocate_initrd = true;
601}
602
603#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
604
605static void __init relocate_initrd(void)
606{
607 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
608 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
609 unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
610 unsigned long ramdisk_here;
611 unsigned long slop, clen, mapaddr;
612 char *p, *q;
613
614 if (!do_relocate_initrd)
615 return;
616
617 ramdisk_here = initrd_start - PAGE_OFFSET;
618
619 q = (char *)initrd_start;
620
621 /* Copy any lowmem portion of the initrd */
622 if (ramdisk_image < end_of_lowmem) {
623 clen = end_of_lowmem - ramdisk_image;
624 p = (char *)__va(ramdisk_image);
625 memcpy(q, p, clen);
626 q += clen;
627 ramdisk_image += clen;
628 ramdisk_size -= clen;
629 }
630
631 /* Copy the highmem portion of the initrd */
632 while (ramdisk_size) {
633 slop = ramdisk_image & ~PAGE_MASK;
634 clen = ramdisk_size;
635 if (clen > MAX_MAP_CHUNK-slop)
636 clen = MAX_MAP_CHUNK-slop;
637 mapaddr = ramdisk_image & PAGE_MASK;
638 p = early_ioremap(mapaddr, clen+slop);
639 memcpy(q, p+slop, clen);
640 early_iounmap(p, clen+slop);
641 q += clen;
642 ramdisk_image += clen;
643 ramdisk_size -= clen;
644 }
645}
646
647#endif /* CONFIG_BLK_DEV_INITRD */
648
649void __init setup_bootmem_allocator(void)
650{
651 unsigned long bootmap_size;
652 /*
653 * Initialize the boot-time allocator (with low memory only):
654 */
655 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
656
657 register_bootmem_low_pages(max_low_pfn);
658
659 /*
660 * Reserve the bootmem bitmap itself as well. We do this in two
661 * steps (first step was init_bootmem()) because this catches
662 * the (very unlikely) case of us accidentally initializing the
663 * bootmem allocator with an invalid RAM area.
664 */
665 reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
666 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
667 BOOTMEM_DEFAULT);
668
669 /*
670 * reserve physical page 0 - it's a special BIOS page on many boxes,
671 * enabling clean reboots, SMP operation, laptop functions.
672 */
673 reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
674
675 /* reserve EBDA region */
676 reserve_ebda_region();
677
678#ifdef CONFIG_SMP
679 /*
680 * But first pinch a few for the stack/trampoline stuff
681 * FIXME: Don't need the extra page at 4K, but need to fix
682 * trampoline before removing it. (see the GDT stuff)
683 */
684 reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
685#endif
686#ifdef CONFIG_ACPI_SLEEP
687 /*
688 * Reserve low memory region for sleep support.
689 */
690 acpi_reserve_bootmem();
691#endif
692#ifdef CONFIG_X86_FIND_SMP_CONFIG
693 /*
694 * Find and reserve possible boot-time SMP configuration:
695 */
696 find_smp_config();
697#endif
698#ifdef CONFIG_BLK_DEV_INITRD
699 reserve_initrd();
700#endif
701 numa_kva_reserve();
702 reserve_crashkernel();
703
704 reserve_ibft_region();
705}
706
707/*
708 * The node 0 pgdat is initialized before all of these because
709 * it's needed for bootmem. node>0 pgdats have their virtual
710 * space allocated before the pagetables are in place to access
711 * them, so they can't be cleared then.
712 *
713 * This should all compile down to nothing when NUMA is off.
714 */
715static void __init remapped_pgdat_init(void)
716{
717 int nid;
718
719 for_each_online_node(nid) {
720 if (nid != 0)
721 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
722 }
723}
724
725#ifdef CONFIG_MCA
726static void set_mca_bus(int x)
727{
728 MCA_bus = x;
729}
730#else
731static void set_mca_bus(int x) { }
732#endif
733
734/* Overridden in paravirt.c if CONFIG_PARAVIRT */
735char * __init __attribute__((weak)) memory_setup(void)
736{
737 return machine_specific_memory_setup();
738}
739
740#ifdef CONFIG_NUMA
741/*
742 * In the golden day, when everything among i386 and x86_64 will be
743 * integrated, this will not live here
744 */
745void *x86_cpu_to_node_map_early_ptr;
746int x86_cpu_to_node_map_init[NR_CPUS] = {
747 [0 ... NR_CPUS-1] = NUMA_NO_NODE
748};
749DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
750#endif
751
752/*
753 * Determine if we were loaded by an EFI loader. If so, then we have also been
754 * passed the efi memmap, systab, etc., so we should use these data structures
755 * for initialization. Note, the efi init code path is determined by the
756 * global efi_enabled. This allows the same kernel image to be used on existing
757 * systems (with a traditional BIOS) as well as on EFI systems.
758 */
759void __init setup_arch(char **cmdline_p)
760{
761 unsigned long max_low_pfn;
762
763 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
764 pre_setup_arch_hook();
765 early_cpu_init();
766 early_ioremap_init();
767
768#ifdef CONFIG_EFI
769 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
770 "EL32", 4))
771 efi_enabled = 1;
772#endif
773
774 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
775 screen_info = boot_params.screen_info;
776 edid_info = boot_params.edid_info;
777 apm_info.bios = boot_params.apm_bios_info;
778 ist_info = boot_params.ist_info;
779 saved_video_mode = boot_params.hdr.vid_mode;
780 if( boot_params.sys_desc_table.length != 0 ) {
781 set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
782 machine_id = boot_params.sys_desc_table.table[0];
783 machine_submodel_id = boot_params.sys_desc_table.table[1];
784 BIOS_revision = boot_params.sys_desc_table.table[2];
785 }
786 bootloader_type = boot_params.hdr.type_of_loader;
787
788#ifdef CONFIG_BLK_DEV_RAM
789 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
790 rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
791 rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
792#endif
793 ARCH_SETUP
794
795 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
796 print_memory_map(memory_setup());
797
798 copy_edd();
799
800 if (!boot_params.hdr.root_flags)
801 root_mountflags &= ~MS_RDONLY;
802 init_mm.start_code = (unsigned long) _text;
803 init_mm.end_code = (unsigned long) _etext;
804 init_mm.end_data = (unsigned long) _edata;
805 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
806
807 code_resource.start = virt_to_phys(_text);
808 code_resource.end = virt_to_phys(_etext)-1;
809 data_resource.start = virt_to_phys(_etext);
810 data_resource.end = virt_to_phys(_edata)-1;
811 bss_resource.start = virt_to_phys(&__bss_start);
812 bss_resource.end = virt_to_phys(&__bss_stop)-1;
813
814 parse_early_param();
815
816 if (user_defined_memmap) {
817 printk(KERN_INFO "user-defined physical RAM map:\n");
818 print_memory_map("user");
819 }
820
821 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
822 *cmdline_p = command_line;
823
824 if (efi_enabled)
825 efi_init();
826
827 /* update e820 for memory not covered by WB MTRRs */
828 propagate_e820_map();
829 mtrr_bp_init();
830 if (mtrr_trim_uncached_memory(max_pfn))
831 propagate_e820_map();
832
833 max_low_pfn = setup_memory();
834
835#ifdef CONFIG_KVM_CLOCK
836 kvmclock_init();
837#endif
838
839#ifdef CONFIG_VMI
840 /*
841 * Must be after max_low_pfn is determined, and before kernel
842 * pagetables are setup.
843 */
844 vmi_init();
845#endif
846 kvm_guest_init();
847
848 /*
849 * NOTE: before this point _nobody_ is allowed to allocate
850 * any memory using the bootmem allocator. Although the
851 * allocator is now initialised only the first 8Mb of the kernel
852 * virtual address space has been mapped. All allocations before
853 * paging_init() has completed must use the alloc_bootmem_low_pages()
854 * variant (which allocates DMA'able memory) and care must be taken
855 * not to exceed the 8Mb limit.
856 */
857
858#ifdef CONFIG_SMP
859 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
860#endif
861 paging_init();
862
863 /*
864 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
865 */
866
867#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
868 if (init_ohci1394_dma_early)
869 init_ohci1394_dma_on_all_controllers();
870#endif
871
872 remapped_pgdat_init();
873 sparse_init();
874 zone_sizes_init();
875
876 /*
877 * NOTE: at this point the bootmem allocator is fully available.
878 */
879
880#ifdef CONFIG_BLK_DEV_INITRD
881 relocate_initrd();
882#endif
883
884 paravirt_post_allocator_init();
885
886 dmi_scan_machine();
887
888 io_delay_init();
889
890#ifdef CONFIG_X86_SMP
891 /*
892 * setup to use the early static init tables during kernel startup
893 * X86_SMP will exclude sub-arches that don't deal well with it.
894 */
895 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
896 x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
897#ifdef CONFIG_NUMA
898 x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
899#endif
900#endif
901
902#ifdef CONFIG_X86_GENERICARCH
903 generic_apic_probe();
904#endif
905
906#ifdef CONFIG_ACPI
907 /*
908 * Parse the ACPI tables for possible boot-time SMP configuration.
909 */
910 acpi_boot_table_init();
911#endif
912
913 early_quirks();
914
915#ifdef CONFIG_ACPI
916 acpi_boot_init();
917
918#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
919 if (def_to_bigsmp)
920 printk(KERN_WARNING "More than 8 CPUs detected and "
921 "CONFIG_X86_PC cannot handle it.\nUse "
922 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
923#endif
924#endif
925#ifdef CONFIG_X86_LOCAL_APIC
926 if (smp_found_config)
927 get_smp_config();
928#endif
929
930 e820_register_memory();
931 e820_mark_nosave_regions();
932
933#ifdef CONFIG_VT
934#if defined(CONFIG_VGA_CONSOLE)
935 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
936 conswitchp = &vga_con;
937#elif defined(CONFIG_DUMMY_CONSOLE)
938 conswitchp = &dummy_con;
939#endif
940#endif
941}
942
943/*
944 * Request address space for all standard resources
945 *
946 * This is called just before pcibios_init(), which is also a
947 * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
948 */
949static int __init request_standard_resources(void)
950{
951 int i;
952
953 printk(KERN_INFO "Setting up standard PCI resources\n");
954 init_iomem_resources(&code_resource, &data_resource, &bss_resource);
955
956 request_resource(&iomem_resource, &video_ram_resource);
957
958 /* request I/O space for devices used on all i[345]86 PCs */
959 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
960 request_resource(&ioport_resource, &standard_io_resources[i]);
961 return 0;
962}
963
964subsys_initcall(request_standard_resources);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
deleted file mode 100644
index 6dff1286ad8a..000000000000
--- a/arch/x86/kernel/setup_64.c
+++ /dev/null
@@ -1,1194 +0,0 @@
1/*
2 * Copyright (C) 1995 Linus Torvalds
3 */
4
5/*
6 * This file handles the architecture-dependent parts of initialization
7 */
8
9#include <linux/errno.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/mm.h>
13#include <linux/stddef.h>
14#include <linux/unistd.h>
15#include <linux/ptrace.h>
16#include <linux/slab.h>
17#include <linux/user.h>
18#include <linux/screen_info.h>
19#include <linux/ioport.h>
20#include <linux/delay.h>
21#include <linux/init.h>
22#include <linux/initrd.h>
23#include <linux/highmem.h>
24#include <linux/bootmem.h>
25#include <linux/module.h>
26#include <asm/processor.h>
27#include <linux/console.h>
28#include <linux/seq_file.h>
29#include <linux/crash_dump.h>
30#include <linux/root_dev.h>
31#include <linux/pci.h>
32#include <asm/pci-direct.h>
33#include <linux/efi.h>
34#include <linux/acpi.h>
35#include <linux/kallsyms.h>
36#include <linux/edd.h>
37#include <linux/iscsi_ibft.h>
38#include <linux/mmzone.h>
39#include <linux/kexec.h>
40#include <linux/cpufreq.h>
41#include <linux/dmi.h>
42#include <linux/dma-mapping.h>
43#include <linux/ctype.h>
44#include <linux/sort.h>
45#include <linux/uaccess.h>
46#include <linux/init_ohci1394_dma.h>
47#include <linux/kvm_para.h>
48
49#include <asm/mtrr.h>
50#include <asm/uaccess.h>
51#include <asm/system.h>
52#include <asm/vsyscall.h>
53#include <asm/io.h>
54#include <asm/smp.h>
55#include <asm/msr.h>
56#include <asm/desc.h>
57#include <video/edid.h>
58#include <asm/e820.h>
59#include <asm/dma.h>
60#include <asm/gart.h>
61#include <asm/mpspec.h>
62#include <asm/mmu_context.h>
63#include <asm/proto.h>
64#include <asm/setup.h>
65#include <asm/numa.h>
66#include <asm/sections.h>
67#include <asm/dmi.h>
68#include <asm/cacheflush.h>
69#include <asm/mce.h>
70#include <asm/ds.h>
71#include <asm/topology.h>
72#include <asm/trampoline.h>
73#include <asm/pat.h>
74
75#include <mach_apic.h>
76#ifdef CONFIG_PARAVIRT
77#include <asm/paravirt.h>
78#else
79#define ARCH_SETUP
80#endif
81
82/*
83 * Machine setup..
84 */
85
86struct cpuinfo_x86 boot_cpu_data __read_mostly;
87EXPORT_SYMBOL(boot_cpu_data);
88
89__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
90
91unsigned long mmu_cr4_features;
92
93/* Boot loader ID as an integer, for the benefit of proc_dointvec */
94int bootloader_type;
95
96unsigned long saved_video_mode;
97
98int force_mwait __cpuinitdata;
99
100/*
101 * Early DMI memory
102 */
103int dmi_alloc_index;
104char dmi_alloc_data[DMI_MAX_DATA];
105
106/*
107 * Setup options
108 */
109struct screen_info screen_info;
110EXPORT_SYMBOL(screen_info);
111struct sys_desc_table_struct {
112 unsigned short length;
113 unsigned char table[0];
114};
115
116struct edid_info edid_info;
117EXPORT_SYMBOL_GPL(edid_info);
118
119extern int root_mountflags;
120
121char __initdata command_line[COMMAND_LINE_SIZE];
122
123static struct resource standard_io_resources[] = {
124 { .name = "dma1", .start = 0x00, .end = 0x1f,
125 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
126 { .name = "pic1", .start = 0x20, .end = 0x21,
127 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
128 { .name = "timer0", .start = 0x40, .end = 0x43,
129 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
130 { .name = "timer1", .start = 0x50, .end = 0x53,
131 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
132 { .name = "keyboard", .start = 0x60, .end = 0x60,
133 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
134 { .name = "keyboard", .start = 0x64, .end = 0x64,
135 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
136 { .name = "dma page reg", .start = 0x80, .end = 0x8f,
137 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
138 { .name = "pic2", .start = 0xa0, .end = 0xa1,
139 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
140 { .name = "dma2", .start = 0xc0, .end = 0xdf,
141 .flags = IORESOURCE_BUSY | IORESOURCE_IO },
142 { .name = "fpu", .start = 0xf0, .end = 0xff,
143 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
144};
145
146#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
147
148static struct resource data_resource = {
149 .name = "Kernel data",
150 .start = 0,
151 .end = 0,
152 .flags = IORESOURCE_RAM,
153};
154static struct resource code_resource = {
155 .name = "Kernel code",
156 .start = 0,
157 .end = 0,
158 .flags = IORESOURCE_RAM,
159};
160static struct resource bss_resource = {
161 .name = "Kernel bss",
162 .start = 0,
163 .end = 0,
164 .flags = IORESOURCE_RAM,
165};
166
167static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
168
169#ifdef CONFIG_PROC_VMCORE
170/* elfcorehdr= specifies the location of elf core header
171 * stored by the crashed kernel. This option will be passed
172 * by kexec loader to the capture kernel.
173 */
174static int __init setup_elfcorehdr(char *arg)
175{
176 char *end;
177 if (!arg)
178 return -EINVAL;
179 elfcorehdr_addr = memparse(arg, &end);
180 return end > arg ? 0 : -EINVAL;
181}
182early_param("elfcorehdr", setup_elfcorehdr);
183#endif
184
185#ifndef CONFIG_NUMA
186static void __init
187contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
188{
189 unsigned long bootmap_size, bootmap;
190
191 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
192 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
193 PAGE_SIZE);
194 if (bootmap == -1L)
195 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
196 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
197 e820_register_active_regions(0, start_pfn, end_pfn);
198 free_bootmem_with_active_regions(0, end_pfn);
199 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
200 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
201}
202#endif
203
204#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
205struct edd edd;
206#ifdef CONFIG_EDD_MODULE
207EXPORT_SYMBOL(edd);
208#endif
209/**
210 * copy_edd() - Copy the BIOS EDD information
211 * from boot_params into a safe place.
212 *
213 */
214static inline void copy_edd(void)
215{
216 memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
217 sizeof(edd.mbr_signature));
218 memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
219 edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
220 edd.edd_info_nr = boot_params.eddbuf_entries;
221}
222#else
223static inline void copy_edd(void)
224{
225}
226#endif
227
228#ifdef CONFIG_KEXEC
229static void __init reserve_crashkernel(void)
230{
231 unsigned long long total_mem;
232 unsigned long long crash_size, crash_base;
233 int ret;
234
235 total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
236
237 ret = parse_crashkernel(boot_command_line, total_mem,
238 &crash_size, &crash_base);
239 if (ret == 0 && crash_size) {
240 if (crash_base <= 0) {
241 printk(KERN_INFO "crashkernel reservation failed - "
242 "you have to specify a base address\n");
243 return;
244 }
245
246 if (reserve_bootmem(crash_base, crash_size,
247 BOOTMEM_EXCLUSIVE) < 0) {
248 printk(KERN_INFO "crashkernel reservation failed - "
249 "memory is in use\n");
250 return;
251 }
252
253 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
254 "for crashkernel (System RAM: %ldMB)\n",
255 (unsigned long)(crash_size >> 20),
256 (unsigned long)(crash_base >> 20),
257 (unsigned long)(total_mem >> 20));
258 crashk_res.start = crash_base;
259 crashk_res.end = crash_base + crash_size - 1;
260 insert_resource(&iomem_resource, &crashk_res);
261 }
262}
263#else
264static inline void __init reserve_crashkernel(void)
265{}
266#endif
267
268/* Overridden in paravirt.c if CONFIG_PARAVIRT */
269void __attribute__((weak)) __init memory_setup(void)
270{
271 machine_specific_memory_setup();
272}
273
274static void __init parse_setup_data(void)
275{
276 struct setup_data *data;
277 unsigned long pa_data;
278
279 if (boot_params.hdr.version < 0x0209)
280 return;
281 pa_data = boot_params.hdr.setup_data;
282 while (pa_data) {
283 data = early_ioremap(pa_data, PAGE_SIZE);
284 switch (data->type) {
285 default:
286 break;
287 }
288#ifndef CONFIG_DEBUG_BOOT_PARAMS
289 free_early(pa_data, pa_data+sizeof(*data)+data->len);
290#endif
291 pa_data = data->next;
292 early_iounmap(data, PAGE_SIZE);
293 }
294}
295
296#ifdef CONFIG_PCI_MMCONFIG
297extern void __cpuinit fam10h_check_enable_mmcfg(void);
298extern void __init check_enable_amd_mmconf_dmi(void);
299#else
300void __cpuinit fam10h_check_enable_mmcfg(void)
301{
302}
303void __init check_enable_amd_mmconf_dmi(void)
304{
305}
306#endif
307
308/*
309 * setup_arch - architecture-specific boot-time initializations
310 *
311 * Note: On x86_64, fixmaps are ready for use even before this is called.
312 */
313void __init setup_arch(char **cmdline_p)
314{
315 unsigned i;
316
317 printk(KERN_INFO "Command line: %s\n", boot_command_line);
318
319 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
320 screen_info = boot_params.screen_info;
321 edid_info = boot_params.edid_info;
322 saved_video_mode = boot_params.hdr.vid_mode;
323 bootloader_type = boot_params.hdr.type_of_loader;
324
325#ifdef CONFIG_BLK_DEV_RAM
326 rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
327 rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
328 rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
329#endif
330#ifdef CONFIG_EFI
331 if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
332 "EL64", 4))
333 efi_enabled = 1;
334#endif
335
336 ARCH_SETUP
337
338 memory_setup();
339 copy_edd();
340
341 if (!boot_params.hdr.root_flags)
342 root_mountflags &= ~MS_RDONLY;
343 init_mm.start_code = (unsigned long) &_text;
344 init_mm.end_code = (unsigned long) &_etext;
345 init_mm.end_data = (unsigned long) &_edata;
346 init_mm.brk = (unsigned long) &_end;
347
348 code_resource.start = virt_to_phys(&_text);
349 code_resource.end = virt_to_phys(&_etext)-1;
350 data_resource.start = virt_to_phys(&_etext);
351 data_resource.end = virt_to_phys(&_edata)-1;
352 bss_resource.start = virt_to_phys(&__bss_start);
353 bss_resource.end = virt_to_phys(&__bss_stop)-1;
354
355 early_identify_cpu(&boot_cpu_data);
356
357 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
358 *cmdline_p = command_line;
359
360 parse_setup_data();
361
362 parse_early_param();
363
364#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
365 if (init_ohci1394_dma_early)
366 init_ohci1394_dma_on_all_controllers();
367#endif
368
369 finish_e820_parsing();
370
371 /* after parse_early_param, so could debug it */
372 insert_resource(&iomem_resource, &code_resource);
373 insert_resource(&iomem_resource, &data_resource);
374 insert_resource(&iomem_resource, &bss_resource);
375
376 early_gart_iommu_check();
377
378 e820_register_active_regions(0, 0, -1UL);
379 /*
380 * partially used pages are not usable - thus
381 * we are rounding upwards:
382 */
383 end_pfn = e820_end_of_ram();
384 /* update e820 for memory not covered by WB MTRRs */
385 mtrr_bp_init();
386 if (mtrr_trim_uncached_memory(end_pfn)) {
387 e820_register_active_regions(0, 0, -1UL);
388 end_pfn = e820_end_of_ram();
389 }
390
391 num_physpages = end_pfn;
392
393 check_efer();
394
395 max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
396 if (efi_enabled)
397 efi_init();
398
399 vsmp_init();
400
401 dmi_scan_machine();
402
403 io_delay_init();
404
405#ifdef CONFIG_KVM_CLOCK
406 kvmclock_init();
407#endif
408
409#ifdef CONFIG_SMP
410 /* setup to use the early static init tables during kernel startup */
411 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
412 x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
413#ifdef CONFIG_NUMA
414 x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
415#endif
416#endif
417
418#ifdef CONFIG_ACPI
419 /*
420 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
421 * Call this early for SRAT node setup.
422 */
423 acpi_boot_table_init();
424#endif
425
426 /* How many end-of-memory variables you have, grandma! */
427 max_low_pfn = end_pfn;
428 max_pfn = end_pfn;
429 high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
430
431 /* Remove active ranges so rediscovery with NUMA-awareness happens */
432 remove_all_active_ranges();
433
434#ifdef CONFIG_ACPI_NUMA
435 /*
436 * Parse SRAT to discover nodes.
437 */
438 acpi_numa_init();
439#endif
440
441#ifdef CONFIG_NUMA
442 numa_initmem_init(0, end_pfn);
443#else
444 contig_initmem_init(0, end_pfn);
445#endif
446
447 dma32_reserve_bootmem();
448
449#ifdef CONFIG_ACPI_SLEEP
450 /*
451 * Reserve low memory region for sleep support.
452 */
453 acpi_reserve_bootmem();
454#endif
455
456 if (efi_enabled)
457 efi_reserve_bootmem();
458
459 /*
460 * Find and reserve possible boot-time SMP configuration:
461 */
462 find_smp_config();
463#ifdef CONFIG_BLK_DEV_INITRD
464 if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
465 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
466 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
467 unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
468 unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
469
470 if (ramdisk_end <= end_of_mem) {
471 /*
472 * don't need to reserve again, already reserved early
473 * in x86_64_start_kernel, and early_res_to_bootmem
474 * convert that to reserved in bootmem
475 */
476 initrd_start = ramdisk_image + PAGE_OFFSET;
477 initrd_end = initrd_start+ramdisk_size;
478 } else {
479 free_bootmem(ramdisk_image, ramdisk_size);
480 printk(KERN_ERR "initrd extends beyond end of memory "
481 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
482 ramdisk_end, end_of_mem);
483 initrd_start = 0;
484 }
485 }
486#endif
487 reserve_crashkernel();
488
489 reserve_ibft_region();
490
491 paging_init();
492 map_vsyscall();
493
494 early_quirks();
495
496#ifdef CONFIG_ACPI
497 /*
498 * Read APIC and some other early information from ACPI tables.
499 */
500 acpi_boot_init();
501#endif
502
503 init_cpu_to_node();
504
505 /*
506 * get boot-time SMP configuration:
507 */
508 if (smp_found_config)
509 get_smp_config();
510 init_apic_mappings();
511 ioapic_init_mappings();
512
513 kvm_guest_init();
514
515 /*
516 * We trust e820 completely. No explicit ROM probing in memory.
517 */
518 e820_reserve_resources();
519 e820_mark_nosave_regions();
520
521 /* request I/O space for devices used on all i[345]86 PCs */
522 for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
523 request_resource(&ioport_resource, &standard_io_resources[i]);
524
525 e820_setup_gap();
526
527#ifdef CONFIG_VT
528#if defined(CONFIG_VGA_CONSOLE)
529 if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
530 conswitchp = &vga_con;
531#elif defined(CONFIG_DUMMY_CONSOLE)
532 conswitchp = &dummy_con;
533#endif
534#endif
535
536 /* do this before identify_cpu for boot cpu */
537 check_enable_amd_mmconf_dmi();
538}
539
540static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
541{
542 unsigned int *v;
543
544 if (c->extended_cpuid_level < 0x80000004)
545 return 0;
546
547 v = (unsigned int *) c->x86_model_id;
548 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
549 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
550 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
551 c->x86_model_id[48] = 0;
552 return 1;
553}
554
555
556static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
557{
558 unsigned int n, dummy, eax, ebx, ecx, edx;
559
560 n = c->extended_cpuid_level;
561
562 if (n >= 0x80000005) {
563 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
564 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
565 "D cache %dK (%d bytes/line)\n",
566 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
567 c->x86_cache_size = (ecx>>24) + (edx>>24);
568 /* On K8 L1 TLB is inclusive, so don't count it */
569 c->x86_tlbsize = 0;
570 }
571
572 if (n >= 0x80000006) {
573 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
574 ecx = cpuid_ecx(0x80000006);
575 c->x86_cache_size = ecx >> 16;
576 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
577
578 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
579 c->x86_cache_size, ecx & 0xFF);
580 }
581 if (n >= 0x80000008) {
582 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
583 c->x86_virt_bits = (eax >> 8) & 0xff;
584 c->x86_phys_bits = eax & 0xff;
585 }
586}
587
588#ifdef CONFIG_NUMA
589static int __cpuinit nearby_node(int apicid)
590{
591 int i, node;
592
593 for (i = apicid - 1; i >= 0; i--) {
594 node = apicid_to_node[i];
595 if (node != NUMA_NO_NODE && node_online(node))
596 return node;
597 }
598 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
599 node = apicid_to_node[i];
600 if (node != NUMA_NO_NODE && node_online(node))
601 return node;
602 }
603 return first_node(node_online_map); /* Shouldn't happen */
604}
605#endif
606
607/*
608 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
609 * Assumes number of cores is a power of two.
610 */
611static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
612{
613#ifdef CONFIG_SMP
614 unsigned bits;
615#ifdef CONFIG_NUMA
616 int cpu = smp_processor_id();
617 int node = 0;
618 unsigned apicid = hard_smp_processor_id();
619#endif
620 bits = c->x86_coreid_bits;
621
622 /* Low order bits define the core id (index of core in socket) */
623 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
624 /* Convert the initial APIC ID into the socket ID */
625 c->phys_proc_id = c->initial_apicid >> bits;
626
627#ifdef CONFIG_NUMA
628 node = c->phys_proc_id;
629 if (apicid_to_node[apicid] != NUMA_NO_NODE)
630 node = apicid_to_node[apicid];
631 if (!node_online(node)) {
632 /* Two possibilities here:
633 - The CPU is missing memory and no node was created.
634 In that case try picking one from a nearby CPU
635 - The APIC IDs differ from the HyperTransport node IDs
636 which the K8 northbridge parsing fills in.
637 Assume they are all increased by a constant offset,
638 but in the same order as the HT nodeids.
639 If that doesn't result in a usable node fall back to the
640 path for the previous case. */
641
642 int ht_nodeid = c->initial_apicid;
643
644 if (ht_nodeid >= 0 &&
645 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
646 node = apicid_to_node[ht_nodeid];
647 /* Pick a nearby node */
648 if (!node_online(node))
649 node = nearby_node(apicid);
650 }
651 numa_set_node(cpu, node);
652
653 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
654#endif
655#endif
656}
657
658static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
659{
660#ifdef CONFIG_SMP
661 unsigned bits, ecx;
662
663 /* Multi core CPU? */
664 if (c->extended_cpuid_level < 0x80000008)
665 return;
666
667 ecx = cpuid_ecx(0x80000008);
668
669 c->x86_max_cores = (ecx & 0xff) + 1;
670
671 /* CPU telling us the core id bits shift? */
672 bits = (ecx >> 12) & 0xF;
673
674 /* Otherwise recompute */
675 if (bits == 0) {
676 while ((1 << bits) < c->x86_max_cores)
677 bits++;
678 }
679
680 c->x86_coreid_bits = bits;
681
682#endif
683}
684
685#define ENABLE_C1E_MASK 0x18000000
686#define CPUID_PROCESSOR_SIGNATURE 1
687#define CPUID_XFAM 0x0ff00000
688#define CPUID_XFAM_K8 0x00000000
689#define CPUID_XFAM_10H 0x00100000
690#define CPUID_XFAM_11H 0x00200000
691#define CPUID_XMOD 0x000f0000
692#define CPUID_XMOD_REV_F 0x00040000
693
694/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
695static __cpuinit int amd_apic_timer_broken(void)
696{
697 u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
698
699 switch (eax & CPUID_XFAM) {
700 case CPUID_XFAM_K8:
701 if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
702 break;
703 case CPUID_XFAM_10H:
704 case CPUID_XFAM_11H:
705 rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
706 if (lo & ENABLE_C1E_MASK)
707 return 1;
708 break;
709 default:
710 /* err on the side of caution */
711 return 1;
712 }
713 return 0;
714}
715
716static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
717{
718 early_init_amd_mc(c);
719
720 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
721 if (c->x86_power & (1<<8))
722 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
723}
724
725static void __cpuinit init_amd(struct cpuinfo_x86 *c)
726{
727 unsigned level;
728
729#ifdef CONFIG_SMP
730 unsigned long value;
731
732 /*
733 * Disable TLB flush filter by setting HWCR.FFDIS on K8
734 * bit 6 of msr C001_0015
735 *
736 * Errata 63 for SH-B3 steppings
737 * Errata 122 for all steppings (F+ have it disabled by default)
738 */
739 if (c->x86 == 15) {
740 rdmsrl(MSR_K8_HWCR, value);
741 value |= 1 << 6;
742 wrmsrl(MSR_K8_HWCR, value);
743 }
744#endif
745
746 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
747 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
748 clear_cpu_cap(c, 0*32+31);
749
750 /* On C+ stepping K8 rep microcode works well for copy/memset */
751 level = cpuid_eax(1);
752 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
753 level >= 0x0f58))
754 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
755 if (c->x86 == 0x10 || c->x86 == 0x11)
756 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
757
758 /* Enable workaround for FXSAVE leak */
759 if (c->x86 >= 6)
760 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
761
762 level = get_model_name(c);
763 if (!level) {
764 switch (c->x86) {
765 case 15:
766 /* Should distinguish Models here, but this is only
767 a fallback anyways. */
768 strcpy(c->x86_model_id, "Hammer");
769 break;
770 }
771 }
772 display_cacheinfo(c);
773
774 /* Multi core CPU? */
775 if (c->extended_cpuid_level >= 0x80000008)
776 amd_detect_cmp(c);
777
778 if (c->extended_cpuid_level >= 0x80000006 &&
779 (cpuid_edx(0x80000006) & 0xf000))
780 num_cache_leaves = 4;
781 else
782 num_cache_leaves = 3;
783
784 if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
785 set_cpu_cap(c, X86_FEATURE_K8);
786
787 /* MFENCE stops RDTSC speculation */
788 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
789
790 if (c->x86 == 0x10)
791 fam10h_check_enable_mmcfg();
792
793 if (amd_apic_timer_broken())
794 disable_apic_timer = 1;
795
796 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
797 unsigned long long tseg;
798
799 /*
800 * Split up direct mapping around the TSEG SMM area.
801 * Don't do it for gbpages because there seems very little
802 * benefit in doing so.
803 */
804 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
805 (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
806 set_memory_4k((unsigned long)__va(tseg), 1);
807 }
808}
809
810void __cpuinit detect_ht(struct cpuinfo_x86 *c)
811{
812#ifdef CONFIG_SMP
813 u32 eax, ebx, ecx, edx;
814 int index_msb, core_bits;
815
816 cpuid(1, &eax, &ebx, &ecx, &edx);
817
818
819 if (!cpu_has(c, X86_FEATURE_HT))
820 return;
821 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
822 goto out;
823
824 smp_num_siblings = (ebx & 0xff0000) >> 16;
825
826 if (smp_num_siblings == 1) {
827 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
828 } else if (smp_num_siblings > 1) {
829
830 if (smp_num_siblings > NR_CPUS) {
831 printk(KERN_WARNING "CPU: Unsupported number of "
832 "siblings %d", smp_num_siblings);
833 smp_num_siblings = 1;
834 return;
835 }
836
837 index_msb = get_count_order(smp_num_siblings);
838 c->phys_proc_id = phys_pkg_id(index_msb);
839
840 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
841
842 index_msb = get_count_order(smp_num_siblings);
843
844 core_bits = get_count_order(c->x86_max_cores);
845
846 c->cpu_core_id = phys_pkg_id(index_msb) &
847 ((1 << core_bits) - 1);
848 }
849out:
850 if ((c->x86_max_cores * smp_num_siblings) > 1) {
851 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
852 c->phys_proc_id);
853 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
854 c->cpu_core_id);
855 }
856
857#endif
858}
859
860/*
861 * find out the number of processor cores on the die
862 */
863static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
864{
865 unsigned int eax, t;
866
867 if (c->cpuid_level < 4)
868 return 1;
869
870 cpuid_count(4, 0, &eax, &t, &t, &t);
871
872 if (eax & 0x1f)
873 return ((eax >> 26) + 1);
874 else
875 return 1;
876}
877
878static void __cpuinit srat_detect_node(void)
879{
880#ifdef CONFIG_NUMA
881 unsigned node;
882 int cpu = smp_processor_id();
883 int apicid = hard_smp_processor_id();
884
885 /* Don't do the funky fallback heuristics the AMD version employs
886 for now. */
887 node = apicid_to_node[apicid];
888 if (node == NUMA_NO_NODE || !node_online(node))
889 node = first_node(node_online_map);
890 numa_set_node(cpu, node);
891
892 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
893#endif
894}
895
896static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
897{
898 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
899 (c->x86 == 0x6 && c->x86_model >= 0x0e))
900 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
901}
902
903static void __cpuinit init_intel(struct cpuinfo_x86 *c)
904{
905 /* Cache sizes */
906 unsigned n;
907
908 init_intel_cacheinfo(c);
909 if (c->cpuid_level > 9) {
910 unsigned eax = cpuid_eax(10);
911 /* Check for version and the number of counters */
912 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
913 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
914 }
915
916 if (cpu_has_ds) {
917 unsigned int l1, l2;
918 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
919 if (!(l1 & (1<<11)))
920 set_cpu_cap(c, X86_FEATURE_BTS);
921 if (!(l1 & (1<<12)))
922 set_cpu_cap(c, X86_FEATURE_PEBS);
923 }
924
925
926 if (cpu_has_bts)
927 ds_init_intel(c);
928
929 n = c->extended_cpuid_level;
930 if (n >= 0x80000008) {
931 unsigned eax = cpuid_eax(0x80000008);
932 c->x86_virt_bits = (eax >> 8) & 0xff;
933 c->x86_phys_bits = eax & 0xff;
934 /* CPUID workaround for Intel 0F34 CPU */
935 if (c->x86_vendor == X86_VENDOR_INTEL &&
936 c->x86 == 0xF && c->x86_model == 0x3 &&
937 c->x86_mask == 0x4)
938 c->x86_phys_bits = 36;
939 }
940
941 if (c->x86 == 15)
942 c->x86_cache_alignment = c->x86_clflush_size * 2;
943 if (c->x86 == 6)
944 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
945 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
946 c->x86_max_cores = intel_num_cpu_cores(c);
947
948 srat_detect_node();
949}
950
951static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
952{
953 if (c->x86 == 0x6 && c->x86_model >= 0xf)
954 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
955}
956
957static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
958{
959 /* Cache sizes */
960 unsigned n;
961
962 n = c->extended_cpuid_level;
963 if (n >= 0x80000008) {
964 unsigned eax = cpuid_eax(0x80000008);
965 c->x86_virt_bits = (eax >> 8) & 0xff;
966 c->x86_phys_bits = eax & 0xff;
967 }
968
969 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
970 c->x86_cache_alignment = c->x86_clflush_size * 2;
971 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
972 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
973 }
974 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
975}
976
977static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
978{
979 char *v = c->x86_vendor_id;
980
981 if (!strcmp(v, "AuthenticAMD"))
982 c->x86_vendor = X86_VENDOR_AMD;
983 else if (!strcmp(v, "GenuineIntel"))
984 c->x86_vendor = X86_VENDOR_INTEL;
985 else if (!strcmp(v, "CentaurHauls"))
986 c->x86_vendor = X86_VENDOR_CENTAUR;
987 else
988 c->x86_vendor = X86_VENDOR_UNKNOWN;
989}
990
991/* Do some early cpuid on the boot CPU to get some parameter that are
992 needed before check_bugs. Everything advanced is in identify_cpu
993 below. */
994static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
995{
996 u32 tfms, xlvl;
997
998 c->loops_per_jiffy = loops_per_jiffy;
999 c->x86_cache_size = -1;
1000 c->x86_vendor = X86_VENDOR_UNKNOWN;
1001 c->x86_model = c->x86_mask = 0; /* So far unknown... */
1002 c->x86_vendor_id[0] = '\0'; /* Unset */
1003 c->x86_model_id[0] = '\0'; /* Unset */
1004 c->x86_clflush_size = 64;
1005 c->x86_cache_alignment = c->x86_clflush_size;
1006 c->x86_max_cores = 1;
1007 c->x86_coreid_bits = 0;
1008 c->extended_cpuid_level = 0;
1009 memset(&c->x86_capability, 0, sizeof c->x86_capability);
1010
1011 /* Get vendor name */
1012 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
1013 (unsigned int *)&c->x86_vendor_id[0],
1014 (unsigned int *)&c->x86_vendor_id[8],
1015 (unsigned int *)&c->x86_vendor_id[4]);
1016
1017 get_cpu_vendor(c);
1018
1019 /* Initialize the standard set of capabilities */
1020 /* Note that the vendor-specific code below might override */
1021
1022 /* Intel-defined flags: level 0x00000001 */
1023 if (c->cpuid_level >= 0x00000001) {
1024 __u32 misc;
1025 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
1026 &c->x86_capability[0]);
1027 c->x86 = (tfms >> 8) & 0xf;
1028 c->x86_model = (tfms >> 4) & 0xf;
1029 c->x86_mask = tfms & 0xf;
1030 if (c->x86 == 0xf)
1031 c->x86 += (tfms >> 20) & 0xff;
1032 if (c->x86 >= 0x6)
1033 c->x86_model += ((tfms >> 16) & 0xF) << 4;
1034 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
1035 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
1036 } else {
1037 /* Have CPUID level 0 only - unheard of */
1038 c->x86 = 4;
1039 }
1040
1041 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
1042#ifdef CONFIG_SMP
1043 c->phys_proc_id = c->initial_apicid;
1044#endif
1045 /* AMD-defined flags: level 0x80000001 */
1046 xlvl = cpuid_eax(0x80000000);
1047 c->extended_cpuid_level = xlvl;
1048 if ((xlvl & 0xffff0000) == 0x80000000) {
1049 if (xlvl >= 0x80000001) {
1050 c->x86_capability[1] = cpuid_edx(0x80000001);
1051 c->x86_capability[6] = cpuid_ecx(0x80000001);
1052 }
1053 if (xlvl >= 0x80000004)
1054 get_model_name(c); /* Default name */
1055 }
1056
1057 /* Transmeta-defined flags: level 0x80860001 */
1058 xlvl = cpuid_eax(0x80860000);
1059 if ((xlvl & 0xffff0000) == 0x80860000) {
1060 /* Don't set x86_cpuid_level here for now to not confuse. */
1061 if (xlvl >= 0x80860001)
1062 c->x86_capability[2] = cpuid_edx(0x80860001);
1063 }
1064
1065 c->extended_cpuid_level = cpuid_eax(0x80000000);
1066 if (c->extended_cpuid_level >= 0x80000007)
1067 c->x86_power = cpuid_edx(0x80000007);
1068
1069 switch (c->x86_vendor) {
1070 case X86_VENDOR_AMD:
1071 early_init_amd(c);
1072 break;
1073 case X86_VENDOR_INTEL:
1074 early_init_intel(c);
1075 break;
1076 case X86_VENDOR_CENTAUR:
1077 early_init_centaur(c);
1078 break;
1079 }
1080
1081 validate_pat_support(c);
1082}
1083
1084/*
1085 * This does the hard work of actually picking apart the CPU stuff...
1086 */
1087void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
1088{
1089 int i;
1090
1091 early_identify_cpu(c);
1092
1093 init_scattered_cpuid_features(c);
1094
1095 c->apicid = phys_pkg_id(0);
1096
1097 /*
1098 * Vendor-specific initialization. In this section we
1099 * canonicalize the feature flags, meaning if there are
1100 * features a certain CPU supports which CPUID doesn't
1101 * tell us, CPUID claiming incorrect flags, or other bugs,
1102 * we handle them here.
1103 *
1104 * At the end of this section, c->x86_capability better
1105 * indicate the features this CPU genuinely supports!
1106 */
1107 switch (c->x86_vendor) {
1108 case X86_VENDOR_AMD:
1109 init_amd(c);
1110 break;
1111
1112 case X86_VENDOR_INTEL:
1113 init_intel(c);
1114 break;
1115
1116 case X86_VENDOR_CENTAUR:
1117 init_centaur(c);
1118 break;
1119
1120 case X86_VENDOR_UNKNOWN:
1121 default:
1122 display_cacheinfo(c);
1123 break;
1124 }
1125
1126 detect_ht(c);
1127
1128 /*
1129 * On SMP, boot_cpu_data holds the common feature set between
1130 * all CPUs; so make sure that we indicate which features are
1131 * common between the CPUs. The first time this routine gets
1132 * executed, c == &boot_cpu_data.
1133 */
1134 if (c != &boot_cpu_data) {
1135 /* AND the already accumulated flags with these */
1136 for (i = 0; i < NCAPINTS; i++)
1137 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1138 }
1139
1140 /* Clear all flags overriden by options */
1141 for (i = 0; i < NCAPINTS; i++)
1142 c->x86_capability[i] &= ~cleared_cpu_caps[i];
1143
1144#ifdef CONFIG_X86_MCE
1145 mcheck_init(c);
1146#endif
1147 select_idle_routine(c);
1148
1149#ifdef CONFIG_NUMA
1150 numa_add_cpu(smp_processor_id());
1151#endif
1152
1153}
1154
1155void __cpuinit identify_boot_cpu(void)
1156{
1157 identify_cpu(&boot_cpu_data);
1158}
1159
1160void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
1161{
1162 BUG_ON(c == &boot_cpu_data);
1163 identify_cpu(c);
1164 mtrr_ap_init();
1165}
1166
1167static __init int setup_noclflush(char *arg)
1168{
1169 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
1170 return 1;
1171}
1172__setup("noclflush", setup_noclflush);
1173
1174void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1175{
1176 if (c->x86_model_id[0])
1177 printk(KERN_CONT "%s", c->x86_model_id);
1178
1179 if (c->x86_mask || c->cpuid_level >= 0)
1180 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
1181 else
1182 printk(KERN_CONT "\n");
1183}
1184
1185static __init int setup_disablecpuid(char *arg)
1186{
1187 int bit;
1188 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
1189 setup_clear_cpu_cap(bit);
1190 else
1191 return 0;
1192 return 1;
1193}
1194__setup("clearcpuid=", setup_disablecpuid);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
new file mode 100644
index 000000000000..5fc310f746fc
--- /dev/null
+++ b/arch/x86/kernel/setup_percpu.c
@@ -0,0 +1,399 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/init.h>
4#include <linux/bootmem.h>
5#include <linux/percpu.h>
6#include <linux/kexec.h>
7#include <linux/crash_dump.h>
8#include <asm/smp.h>
9#include <asm/percpu.h>
10#include <asm/sections.h>
11#include <asm/processor.h>
12#include <asm/setup.h>
13#include <asm/topology.h>
14#include <asm/mpspec.h>
15#include <asm/apicdef.h>
16#include <asm/highmem.h>
17
18#ifdef CONFIG_X86_LOCAL_APIC
19unsigned int num_processors;
20unsigned disabled_cpus __cpuinitdata;
21/* Processor that is doing the boot up */
22unsigned int boot_cpu_physical_apicid = -1U;
23unsigned int max_physical_apicid;
24EXPORT_SYMBOL(boot_cpu_physical_apicid);
25
26/* Bitmask of physically existing CPUs */
27physid_mask_t phys_cpu_present_map;
28#endif
29
30/* map cpu index to physical APIC ID */
31DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
32DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
34EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
35
36#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
37#define X86_64_NUMA 1
38
39/* map cpu index to node index */
40DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
41EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
42
43/* which logical CPUs are on which nodes */
44cpumask_t *node_to_cpumask_map;
45EXPORT_SYMBOL(node_to_cpumask_map);
46
47/* setup node_to_cpumask_map */
48static void __init setup_node_to_cpumask_map(void);
49
50#else
51static inline void setup_node_to_cpumask_map(void) { }
52#endif
53
54#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
55/*
56 * Copy data used in early init routines from the initial arrays to the
57 * per cpu data areas. These arrays then become expendable and the
58 * *_early_ptr's are zeroed indicating that the static arrays are gone.
59 */
60static void __init setup_per_cpu_maps(void)
61{
62 int cpu;
63
64 for_each_possible_cpu(cpu) {
65 per_cpu(x86_cpu_to_apicid, cpu) =
66 early_per_cpu_map(x86_cpu_to_apicid, cpu);
67 per_cpu(x86_bios_cpu_apicid, cpu) =
68 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
69#ifdef X86_64_NUMA
70 per_cpu(x86_cpu_to_node_map, cpu) =
71 early_per_cpu_map(x86_cpu_to_node_map, cpu);
72#endif
73 }
74
75 /* indicate the early static arrays will soon be gone */
76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
78#ifdef X86_64_NUMA
79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
80#endif
81}
82
83#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
84cpumask_t *cpumask_of_cpu_map __read_mostly;
85EXPORT_SYMBOL(cpumask_of_cpu_map);
86
87/* requires nr_cpu_ids to be initialized */
88static void __init setup_cpumask_of_cpu(void)
89{
90 int i;
91
92 /* alloc_bootmem zeroes memory */
93 cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
94 for (i = 0; i < nr_cpu_ids; i++)
95 cpu_set(i, cpumask_of_cpu_map[i]);
96}
97#else
98static inline void setup_cpumask_of_cpu(void) { }
99#endif
100
101#ifdef CONFIG_X86_32
102/*
103 * Great future not-so-futuristic plan: make i386 and x86_64 do it
104 * the same way
105 */
106unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
107EXPORT_SYMBOL(__per_cpu_offset);
108static inline void setup_cpu_pda_map(void) { }
109
110#elif !defined(CONFIG_SMP)
111static inline void setup_cpu_pda_map(void) { }
112
113#else /* CONFIG_SMP && CONFIG_X86_64 */
114
115/*
116 * Allocate cpu_pda pointer table and array via alloc_bootmem.
117 */
118static void __init setup_cpu_pda_map(void)
119{
120 char *pda;
121 struct x8664_pda **new_cpu_pda;
122 unsigned long size;
123 int cpu;
124
125 size = roundup(sizeof(struct x8664_pda), cache_line_size());
126
127 /* allocate cpu_pda array and pointer table */
128 {
129 unsigned long tsize = nr_cpu_ids * sizeof(void *);
130 unsigned long asize = size * (nr_cpu_ids - 1);
131
132 tsize = roundup(tsize, cache_line_size());
133 new_cpu_pda = alloc_bootmem(tsize + asize);
134 pda = (char *)new_cpu_pda + tsize;
135 }
136
137 /* initialize pointer table to static pda's */
138 for_each_possible_cpu(cpu) {
139 if (cpu == 0) {
140 /* leave boot cpu pda in place */
141 new_cpu_pda[0] = cpu_pda(0);
142 continue;
143 }
144 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
145 new_cpu_pda[cpu]->in_bootmem = 1;
146 pda += size;
147 }
148
149 /* point to new pointer table */
150 _cpu_pda = new_cpu_pda;
151}
152#endif
153
154/*
155 * Great future plan:
156 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
157 * Always point %gs to its beginning
158 */
159void __init setup_per_cpu_areas(void)
160{
161 ssize_t size = PERCPU_ENOUGH_ROOM;
162 char *ptr;
163 int cpu;
164
165 /* Setup cpu_pda map */
166 setup_cpu_pda_map();
167
168 /* Copy section for each CPU (we discard the original) */
169 size = PERCPU_ENOUGH_ROOM;
170 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
171 size);
172
173 for_each_possible_cpu(cpu) {
174#ifndef CONFIG_NEED_MULTIPLE_NODES
175 ptr = alloc_bootmem_pages(size);
176#else
177 int node = early_cpu_to_node(cpu);
178 if (!node_online(node) || !NODE_DATA(node)) {
179 ptr = alloc_bootmem_pages(size);
180 printk(KERN_INFO
181 "cpu %d has no node %d or node-local memory\n",
182 cpu, node);
183 }
184 else
185 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
186#endif
187 per_cpu_offset(cpu) = ptr - __per_cpu_start;
188 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
189
190 }
191
192 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
193 NR_CPUS, nr_cpu_ids, nr_node_ids);
194
195 /* Setup percpu data maps */
196 setup_per_cpu_maps();
197
198 /* Setup node to cpumask map */
199 setup_node_to_cpumask_map();
200
201 /* Setup cpumask_of_cpu map */
202 setup_cpumask_of_cpu();
203}
204
205#endif
206
207#ifdef X86_64_NUMA
208
209/*
210 * Allocate node_to_cpumask_map based on number of available nodes
211 * Requires node_possible_map to be valid.
212 *
213 * Note: node_to_cpumask() is not valid until after this is done.
214 */
215static void __init setup_node_to_cpumask_map(void)
216{
217 unsigned int node, num = 0;
218 cpumask_t *map;
219
220 /* setup nr_node_ids if not done yet */
221 if (nr_node_ids == MAX_NUMNODES) {
222 for_each_node_mask(node, node_possible_map)
223 num = node;
224 nr_node_ids = num + 1;
225 }
226
227 /* allocate the map */
228 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
229
230 Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
231 map, nr_node_ids);
232
233 /* node_to_cpumask() will now work */
234 node_to_cpumask_map = map;
235}
236
237void __cpuinit numa_set_node(int cpu, int node)
238{
239 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
240
241 if (cpu_pda(cpu) && node != NUMA_NO_NODE)
242 cpu_pda(cpu)->nodenumber = node;
243
244 if (cpu_to_node_map)
245 cpu_to_node_map[cpu] = node;
246
247 else if (per_cpu_offset(cpu))
248 per_cpu(x86_cpu_to_node_map, cpu) = node;
249
250 else
251 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
252}
253
254void __cpuinit numa_clear_node(int cpu)
255{
256 numa_set_node(cpu, NUMA_NO_NODE);
257}
258
259#ifndef CONFIG_DEBUG_PER_CPU_MAPS
260
261void __cpuinit numa_add_cpu(int cpu)
262{
263 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
264}
265
266void __cpuinit numa_remove_cpu(int cpu)
267{
268 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
269}
270
271#else /* CONFIG_DEBUG_PER_CPU_MAPS */
272
273/*
274 * --------- debug versions of the numa functions ---------
275 */
276static void __cpuinit numa_set_cpumask(int cpu, int enable)
277{
278 int node = cpu_to_node(cpu);
279 cpumask_t *mask;
280 char buf[64];
281
282 if (node_to_cpumask_map == NULL) {
283 printk(KERN_ERR "node_to_cpumask_map NULL\n");
284 dump_stack();
285 return;
286 }
287
288 mask = &node_to_cpumask_map[node];
289 if (enable)
290 cpu_set(cpu, *mask);
291 else
292 cpu_clear(cpu, *mask);
293
294 cpulist_scnprintf(buf, sizeof(buf), *mask);
295 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
296 enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
297 }
298
299void __cpuinit numa_add_cpu(int cpu)
300{
301 numa_set_cpumask(cpu, 1);
302}
303
304void __cpuinit numa_remove_cpu(int cpu)
305{
306 numa_set_cpumask(cpu, 0);
307}
308
309int cpu_to_node(int cpu)
310{
311 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
312 printk(KERN_WARNING
313 "cpu_to_node(%d): usage too early!\n", cpu);
314 dump_stack();
315 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
316 }
317 return per_cpu(x86_cpu_to_node_map, cpu);
318}
319EXPORT_SYMBOL(cpu_to_node);
320
321/*
322 * Same function as cpu_to_node() but used if called before the
323 * per_cpu areas are setup.
324 */
325int early_cpu_to_node(int cpu)
326{
327 if (early_per_cpu_ptr(x86_cpu_to_node_map))
328 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
329
330 if (!per_cpu_offset(cpu)) {
331 printk(KERN_WARNING
332 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
333 dump_stack();
334 return NUMA_NO_NODE;
335 }
336 return per_cpu(x86_cpu_to_node_map, cpu);
337}
338
339
340/* empty cpumask */
341static const cpumask_t cpu_mask_none;
342
343/*
344 * Returns a pointer to the bitmask of CPUs on Node 'node'.
345 */
346cpumask_t *_node_to_cpumask_ptr(int node)
347{
348 if (node_to_cpumask_map == NULL) {
349 printk(KERN_WARNING
350 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
351 node);
352 dump_stack();
353 return &cpu_online_map;
354 }
355 if (node >= nr_node_ids) {
356 printk(KERN_WARNING
357 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
358 node, nr_node_ids);
359 dump_stack();
360 return (cpumask_t *)&cpu_mask_none;
361 }
362 return (cpumask_t *)&node_to_cpumask_map[node];
363}
364EXPORT_SYMBOL(_node_to_cpumask_ptr);
365
366/*
367 * Returns a bitmask of CPUs on Node 'node'.
368 *
369 * Side note: this function creates the returned cpumask on the stack
370 * so with a high NR_CPUS count, excessive stack space is used. The
371 * node_to_cpumask_ptr function should be used whenever possible.
372 */
373cpumask_t node_to_cpumask(int node)
374{
375 if (node_to_cpumask_map == NULL) {
376 printk(KERN_WARNING
377 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
378 dump_stack();
379 return cpu_online_map;
380 }
381 if (node >= nr_node_ids) {
382 printk(KERN_WARNING
383 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
384 node, nr_node_ids);
385 dump_stack();
386 return cpu_mask_none;
387 }
388 return node_to_cpumask_map[node];
389}
390EXPORT_SYMBOL(node_to_cpumask);
391
392/*
393 * --------- end of debug versions of the numa functions ---------
394 */
395
396#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
397
398#endif /* X86_64_NUMA */
399
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3e1cecedde42..f35c2d8016ac 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -59,7 +59,6 @@
59#include <asm/pgtable.h> 59#include <asm/pgtable.h>
60#include <asm/tlbflush.h> 60#include <asm/tlbflush.h>
61#include <asm/mtrr.h> 61#include <asm/mtrr.h>
62#include <asm/nmi.h>
63#include <asm/vmi.h> 62#include <asm/vmi.h>
64#include <asm/genapic.h> 63#include <asm/genapic.h>
65#include <linux/mc146818rtc.h> 64#include <linux/mc146818rtc.h>
@@ -68,22 +67,6 @@
68#include <mach_wakecpu.h> 67#include <mach_wakecpu.h>
69#include <smpboot_hooks.h> 68#include <smpboot_hooks.h>
70 69
71/*
72 * FIXME: For x86_64, those are defined in other files. But moving them here,
73 * would make the setup areas dependent on smp, which is a loss. When we
74 * integrate apic between arches, we can probably do a better job, but
75 * right now, they'll stay here -- glommer
76 */
77
78/* which logical CPU number maps to which CPU (physical APIC ID) */
79u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
80 { [0 ... NR_CPUS-1] = BAD_APICID };
81void *x86_cpu_to_apicid_early_ptr;
82
83u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
84 = { [0 ... NR_CPUS-1] = BAD_APICID };
85void *x86_bios_cpu_apicid_early_ptr;
86
87#ifdef CONFIG_X86_32 70#ifdef CONFIG_X86_32
88u8 apicid_2_node[MAX_APICID]; 71u8 apicid_2_node[MAX_APICID];
89static int low_mappings; 72static int low_mappings;
@@ -198,13 +181,12 @@ static void map_cpu_to_logical_apicid(void)
198 map_cpu_to_node(cpu, node); 181 map_cpu_to_node(cpu, node);
199} 182}
200 183
201static void unmap_cpu_to_logical_apicid(int cpu) 184void numa_remove_cpu(int cpu)
202{ 185{
203 cpu_2_logical_apicid[cpu] = BAD_APICID; 186 cpu_2_logical_apicid[cpu] = BAD_APICID;
204 unmap_cpu_to_node(cpu); 187 unmap_cpu_to_node(cpu);
205} 188}
206#else 189#else
207#define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
208#define map_cpu_to_logical_apicid() do {} while (0) 190#define map_cpu_to_logical_apicid() do {} while (0)
209#endif 191#endif
210 192
@@ -346,15 +328,8 @@ static void __cpuinit start_secondary(void *unused)
346 * smp_call_function(). 328 * smp_call_function().
347 */ 329 */
348 lock_ipi_call_lock(); 330 lock_ipi_call_lock();
349#ifdef CONFIG_X86_64 331#ifdef CONFIG_X86_IO_APIC
350 spin_lock(&vector_lock); 332 setup_vector_irq(smp_processor_id());
351
352 /* Setup the per cpu irq handling data structures */
353 __setup_vector_irq(smp_processor_id());
354 /*
355 * Allow the master to continue.
356 */
357 spin_unlock(&vector_lock);
358#endif 333#endif
359 cpu_set(smp_processor_id(), cpu_online_map); 334 cpu_set(smp_processor_id(), cpu_online_map);
360 unlock_ipi_call_lock(); 335 unlock_ipi_call_lock();
@@ -366,31 +341,8 @@ static void __cpuinit start_secondary(void *unused)
366 cpu_idle(); 341 cpu_idle();
367} 342}
368 343
369#ifdef CONFIG_X86_32
370/*
371 * Everything has been set up for the secondary
372 * CPUs - they just need to reload everything
373 * from the task structure
374 * This function must not return.
375 */
376void __devinit initialize_secondary(void)
377{
378 /*
379 * We don't actually need to load the full TSS,
380 * basically just the stack pointer and the ip.
381 */
382
383 asm volatile(
384 "movl %0,%%esp\n\t"
385 "jmp *%1"
386 :
387 :"m" (current->thread.sp), "m" (current->thread.ip));
388}
389#endif
390
391static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) 344static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
392{ 345{
393#ifdef CONFIG_X86_32
394 /* 346 /*
395 * Mask B, Pentium, but not Pentium MMX 347 * Mask B, Pentium, but not Pentium MMX
396 */ 348 */
@@ -440,7 +392,6 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
440 392
441valid_k7: 393valid_k7:
442 ; 394 ;
443#endif
444} 395}
445 396
446static void __cpuinit smp_checks(void) 397static void __cpuinit smp_checks(void)
@@ -555,23 +506,6 @@ cpumask_t cpu_coregroup_map(int cpu)
555 return c->llc_shared_map; 506 return c->llc_shared_map;
556} 507}
557 508
558#ifdef CONFIG_X86_32
559/*
560 * We are called very early to get the low memory for the
561 * SMP bootup trampoline page.
562 */
563void __init smp_alloc_memory(void)
564{
565 trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
566 /*
567 * Has to be in very low memory so we can execute
568 * real-mode AP code.
569 */
570 if (__pa(trampoline_base) >= 0x9F000)
571 BUG();
572}
573#endif
574
575static void impress_friends(void) 509static void impress_friends(void)
576{ 510{
577 int cpu; 511 int cpu;
@@ -748,11 +682,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
748 * target processor state. 682 * target processor state.
749 */ 683 */
750 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, 684 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
751#ifdef CONFIG_X86_64
752 (unsigned long)init_rsp);
753#else
754 (unsigned long)stack_start.sp); 685 (unsigned long)stack_start.sp);
755#endif
756 686
757 /* 687 /*
758 * Run STARTUP IPI loop. 688 * Run STARTUP IPI loop.
@@ -832,6 +762,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
832 complete(&c_idle->done); 762 complete(&c_idle->done);
833} 763}
834 764
765#ifdef CONFIG_X86_64
766/*
767 * Allocate node local memory for the AP pda.
768 *
769 * Must be called after the _cpu_pda pointer table is initialized.
770 */
771static int __cpuinit get_local_pda(int cpu)
772{
773 struct x8664_pda *oldpda, *newpda;
774 unsigned long size = sizeof(struct x8664_pda);
775 int node = cpu_to_node(cpu);
776
777 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
778 return 0;
779
780 oldpda = cpu_pda(cpu);
781 newpda = kmalloc_node(size, GFP_ATOMIC, node);
782 if (!newpda) {
783 printk(KERN_ERR "Could not allocate node local PDA "
784 "for CPU %d on node %d\n", cpu, node);
785
786 if (oldpda)
787 return 0; /* have a usable pda */
788 else
789 return -1;
790 }
791
792 if (oldpda) {
793 memcpy(newpda, oldpda, size);
794 if (!after_bootmem)
795 free_bootmem((unsigned long)oldpda, size);
796 }
797
798 newpda->in_bootmem = 0;
799 cpu_pda(cpu) = newpda;
800 return 0;
801}
802#endif /* CONFIG_X86_64 */
803
835static int __cpuinit do_boot_cpu(int apicid, int cpu) 804static int __cpuinit do_boot_cpu(int apicid, int cpu)
836/* 805/*
837 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 806 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -848,28 +817,14 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
848 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 817 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
849 }; 818 };
850 INIT_WORK(&c_idle.work, do_fork_idle); 819 INIT_WORK(&c_idle.work, do_fork_idle);
851#ifdef CONFIG_X86_64
852 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
853 if (!cpu_gdt_descr[cpu].address &&
854 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
855 printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
856 return -1;
857 }
858 820
821#ifdef CONFIG_X86_64
859 /* Allocate node local memory for AP pdas */ 822 /* Allocate node local memory for AP pdas */
860 if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { 823 if (cpu > 0) {
861 struct x8664_pda *newpda, *pda; 824 boot_error = get_local_pda(cpu);
862 int node = cpu_to_node(cpu); 825 if (boot_error)
863 pda = cpu_pda(cpu); 826 goto restore_state;
864 newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, 827 /* if can't get pda memory, can't start cpu */
865 node);
866 if (newpda) {
867 memcpy(newpda, pda, sizeof(struct x8664_pda));
868 cpu_pda(cpu) = newpda;
869 } else
870 printk(KERN_ERR
871 "Could not allocate node local PDA for CPU %d on node %d\n",
872 cpu, node);
873 } 828 }
874#endif 829#endif
875 830
@@ -905,18 +860,15 @@ do_rest:
905#ifdef CONFIG_X86_32 860#ifdef CONFIG_X86_32
906 per_cpu(current_task, cpu) = c_idle.idle; 861 per_cpu(current_task, cpu) = c_idle.idle;
907 init_gdt(cpu); 862 init_gdt(cpu);
908 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
909 c_idle.idle->thread.ip = (unsigned long) start_secondary;
910 /* Stack for startup_32 can be just as for start_secondary onwards */ 863 /* Stack for startup_32 can be just as for start_secondary onwards */
911 stack_start.sp = (void *) c_idle.idle->thread.sp;
912 irq_ctx_init(cpu); 864 irq_ctx_init(cpu);
913#else 865#else
914 cpu_pda(cpu)->pcurrent = c_idle.idle; 866 cpu_pda(cpu)->pcurrent = c_idle.idle;
915 init_rsp = c_idle.idle->thread.sp;
916 load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
917 initial_code = (unsigned long)start_secondary;
918 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 867 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
919#endif 868#endif
869 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
870 initial_code = (unsigned long)start_secondary;
871 stack_start.sp = (void *) c_idle.idle->thread.sp;
920 872
921 /* start_ip had better be page-aligned! */ 873 /* start_ip had better be page-aligned! */
922 start_ip = setup_trampoline(); 874 start_ip = setup_trampoline();
@@ -987,13 +939,12 @@ do_rest:
987 inquire_remote_apic(apicid); 939 inquire_remote_apic(apicid);
988 } 940 }
989 } 941 }
990
991 if (boot_error) {
992 /* Try to put things back the way they were before ... */
993 unmap_cpu_to_logical_apicid(cpu);
994#ifdef CONFIG_X86_64 942#ifdef CONFIG_X86_64
995 clear_node_cpumask(cpu); /* was set by numa_add_cpu */ 943restore_state:
996#endif 944#endif
945 if (boot_error) {
946 /* Try to put things back the way they were before ... */
947 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
997 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ 948 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
998 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 949 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
999 cpu_clear(cpu, cpu_present_map); 950 cpu_clear(cpu, cpu_present_map);
@@ -1087,14 +1038,12 @@ static __init void disable_smp(void)
1087{ 1038{
1088 cpu_present_map = cpumask_of_cpu(0); 1039 cpu_present_map = cpumask_of_cpu(0);
1089 cpu_possible_map = cpumask_of_cpu(0); 1040 cpu_possible_map = cpumask_of_cpu(0);
1090#ifdef CONFIG_X86_32
1091 smpboot_clear_io_apic_irqs(); 1041 smpboot_clear_io_apic_irqs();
1092#endif 1042
1093 if (smp_found_config) 1043 if (smp_found_config)
1094 phys_cpu_present_map = 1044 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1095 physid_mask_of_physid(boot_cpu_physical_apicid);
1096 else 1045 else
1097 phys_cpu_present_map = physid_mask_of_physid(0); 1046 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1098 map_cpu_to_logical_apicid(); 1047 map_cpu_to_logical_apicid();
1099 cpu_set(0, per_cpu(cpu_sibling_map, 0)); 1048 cpu_set(0, per_cpu(cpu_sibling_map, 0));
1100 cpu_set(0, per_cpu(cpu_core_map, 0)); 1049 cpu_set(0, per_cpu(cpu_core_map, 0));
@@ -1157,12 +1106,12 @@ static int __init smp_sanity_check(unsigned max_cpus)
1157 * If SMP should be disabled, then really disable it! 1106 * If SMP should be disabled, then really disable it!
1158 */ 1107 */
1159 if (!max_cpus) { 1108 if (!max_cpus) {
1160 printk(KERN_INFO "SMP mode deactivated," 1109 printk(KERN_INFO "SMP mode deactivated.\n");
1161 "forcing use of dummy APIC emulation.\n");
1162 smpboot_clear_io_apic(); 1110 smpboot_clear_io_apic();
1163#ifdef CONFIG_X86_32 1111
1112 localise_nmi_watchdog();
1113
1164 connect_bsp_APIC(); 1114 connect_bsp_APIC();
1165#endif
1166 setup_local_APIC(); 1115 setup_local_APIC();
1167 end_local_APIC_setup(); 1116 end_local_APIC_setup();
1168 return -1; 1117 return -1;
@@ -1190,7 +1139,6 @@ static void __init smp_cpu_index_default(void)
1190void __init native_smp_prepare_cpus(unsigned int max_cpus) 1139void __init native_smp_prepare_cpus(unsigned int max_cpus)
1191{ 1140{
1192 preempt_disable(); 1141 preempt_disable();
1193 nmi_watchdog_default();
1194 smp_cpu_index_default(); 1142 smp_cpu_index_default();
1195 current_cpu_data = boot_cpu_data; 1143 current_cpu_data = boot_cpu_data;
1196 cpu_callin_map = cpumask_of_cpu(0); 1144 cpu_callin_map = cpumask_of_cpu(0);
@@ -1217,9 +1165,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1217 } 1165 }
1218 preempt_enable(); 1166 preempt_enable();
1219 1167
1220#ifdef CONFIG_X86_32
1221 connect_bsp_APIC(); 1168 connect_bsp_APIC();
1222#endif 1169
1223 /* 1170 /*
1224 * Switch from PIC to APIC mode. 1171 * Switch from PIC to APIC mode.
1225 */ 1172 */
@@ -1257,8 +1204,8 @@ void __init native_smp_prepare_boot_cpu(void)
1257 int me = smp_processor_id(); 1204 int me = smp_processor_id();
1258#ifdef CONFIG_X86_32 1205#ifdef CONFIG_X86_32
1259 init_gdt(me); 1206 init_gdt(me);
1260 switch_to_new_gdt();
1261#endif 1207#endif
1208 switch_to_new_gdt();
1262 /* already set me in cpu_online_map in boot_cpu_init() */ 1209 /* already set me in cpu_online_map in boot_cpu_init() */
1263 cpu_set(me, cpu_callout_map); 1210 cpu_set(me, cpu_callout_map);
1264 per_cpu(cpu_state, me) = CPU_ONLINE; 1211 per_cpu(cpu_state, me) = CPU_ONLINE;
@@ -1278,23 +1225,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1278 1225
1279#ifdef CONFIG_HOTPLUG_CPU 1226#ifdef CONFIG_HOTPLUG_CPU
1280 1227
1281# ifdef CONFIG_X86_32
1282void cpu_exit_clear(void)
1283{
1284 int cpu = raw_smp_processor_id();
1285
1286 idle_task_exit();
1287
1288 cpu_uninit();
1289 irq_ctx_exit(cpu);
1290
1291 cpu_clear(cpu, cpu_callout_map);
1292 cpu_clear(cpu, cpu_callin_map);
1293
1294 unmap_cpu_to_logical_apicid(cpu);
1295}
1296# endif /* CONFIG_X86_32 */
1297
1298static void remove_siblinginfo(int cpu) 1228static void remove_siblinginfo(int cpu)
1299{ 1229{
1300 int sibling; 1230 int sibling;
@@ -1348,12 +1278,20 @@ __init void prefill_possible_map(void)
1348 int i; 1278 int i;
1349 int possible; 1279 int possible;
1350 1280
1281 /* no processor from mptable or madt */
1282 if (!num_processors)
1283 num_processors = 1;
1284
1285#ifdef CONFIG_HOTPLUG_CPU
1351 if (additional_cpus == -1) { 1286 if (additional_cpus == -1) {
1352 if (disabled_cpus > 0) 1287 if (disabled_cpus > 0)
1353 additional_cpus = disabled_cpus; 1288 additional_cpus = disabled_cpus;
1354 else 1289 else
1355 additional_cpus = 0; 1290 additional_cpus = 0;
1356 } 1291 }
1292#else
1293 additional_cpus = 0;
1294#endif
1357 possible = num_processors + additional_cpus; 1295 possible = num_processors + additional_cpus;
1358 if (possible > NR_CPUS) 1296 if (possible > NR_CPUS)
1359 possible = NR_CPUS; 1297 possible = NR_CPUS;
@@ -1363,18 +1301,18 @@ __init void prefill_possible_map(void)
1363 1301
1364 for (i = 0; i < possible; i++) 1302 for (i = 0; i < possible; i++)
1365 cpu_set(i, cpu_possible_map); 1303 cpu_set(i, cpu_possible_map);
1304
1305 nr_cpu_ids = possible;
1366} 1306}
1367 1307
1368static void __ref remove_cpu_from_maps(int cpu) 1308static void __ref remove_cpu_from_maps(int cpu)
1369{ 1309{
1370 cpu_clear(cpu, cpu_online_map); 1310 cpu_clear(cpu, cpu_online_map);
1371#ifdef CONFIG_X86_64
1372 cpu_clear(cpu, cpu_callout_map); 1311 cpu_clear(cpu, cpu_callout_map);
1373 cpu_clear(cpu, cpu_callin_map); 1312 cpu_clear(cpu, cpu_callin_map);
1374 /* was set by cpu_init() */ 1313 /* was set by cpu_init() */
1375 clear_bit(cpu, (unsigned long *)&cpu_initialized); 1314 clear_bit(cpu, (unsigned long *)&cpu_initialized);
1376 clear_node_cpumask(cpu); 1315 numa_remove_cpu(cpu);
1377#endif
1378} 1316}
1379 1317
1380int __cpu_disable(void) 1318int __cpu_disable(void)
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index ae751094eba9..d67ce5f044ba 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; 37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
38 38
39#ifndef CONFIG_X86_NUMAQ
39static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; 40static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
41#endif
40 42
41static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) 43static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
42{ 44{
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index d2ab52cc1d6b..7066cb855a60 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -19,8 +19,8 @@
19#include <linux/utsname.h> 19#include <linux/utsname.h>
20#include <linux/ipc.h> 20#include <linux/ipc.h>
21 21
22#include <asm/uaccess.h> 22#include <linux/uaccess.h>
23#include <asm/unistd.h> 23#include <linux/unistd.h>
24 24
25asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, 25asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
26 unsigned long prot, unsigned long flags, 26 unsigned long prot, unsigned long flags,
@@ -103,7 +103,7 @@ asmlinkage int old_select(struct sel_arg_struct __user *arg)
103 * 103 *
104 * This is really horribly ugly. 104 * This is really horribly ugly.
105 */ 105 */
106asmlinkage int sys_ipc (uint call, int first, int second, 106asmlinkage int sys_ipc(uint call, int first, int second,
107 int third, void __user *ptr, long fifth) 107 int third, void __user *ptr, long fifth)
108{ 108{
109 int version, ret; 109 int version, ret;
@@ -113,24 +113,24 @@ asmlinkage int sys_ipc (uint call, int first, int second,
113 113
114 switch (call) { 114 switch (call) {
115 case SEMOP: 115 case SEMOP:
116 return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL); 116 return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
117 case SEMTIMEDOP: 117 case SEMTIMEDOP:
118 return sys_semtimedop(first, (struct sembuf __user *)ptr, second, 118 return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
119 (const struct timespec __user *)fifth); 119 (const struct timespec __user *)fifth);
120 120
121 case SEMGET: 121 case SEMGET:
122 return sys_semget (first, second, third); 122 return sys_semget(first, second, third);
123 case SEMCTL: { 123 case SEMCTL: {
124 union semun fourth; 124 union semun fourth;
125 if (!ptr) 125 if (!ptr)
126 return -EINVAL; 126 return -EINVAL;
127 if (get_user(fourth.__pad, (void __user * __user *) ptr)) 127 if (get_user(fourth.__pad, (void __user * __user *) ptr))
128 return -EFAULT; 128 return -EFAULT;
129 return sys_semctl (first, second, third, fourth); 129 return sys_semctl(first, second, third, fourth);
130 } 130 }
131 131
132 case MSGSND: 132 case MSGSND:
133 return sys_msgsnd (first, (struct msgbuf __user *) ptr, 133 return sys_msgsnd(first, (struct msgbuf __user *) ptr,
134 second, third); 134 second, third);
135 case MSGRCV: 135 case MSGRCV:
136 switch (version) { 136 switch (version) {
@@ -138,45 +138,45 @@ asmlinkage int sys_ipc (uint call, int first, int second,
138 struct ipc_kludge tmp; 138 struct ipc_kludge tmp;
139 if (!ptr) 139 if (!ptr)
140 return -EINVAL; 140 return -EINVAL;
141 141
142 if (copy_from_user(&tmp, 142 if (copy_from_user(&tmp,
143 (struct ipc_kludge __user *) ptr, 143 (struct ipc_kludge __user *) ptr,
144 sizeof (tmp))) 144 sizeof(tmp)))
145 return -EFAULT; 145 return -EFAULT;
146 return sys_msgrcv (first, tmp.msgp, second, 146 return sys_msgrcv(first, tmp.msgp, second,
147 tmp.msgtyp, third); 147 tmp.msgtyp, third);
148 } 148 }
149 default: 149 default:
150 return sys_msgrcv (first, 150 return sys_msgrcv(first,
151 (struct msgbuf __user *) ptr, 151 (struct msgbuf __user *) ptr,
152 second, fifth, third); 152 second, fifth, third);
153 } 153 }
154 case MSGGET: 154 case MSGGET:
155 return sys_msgget ((key_t) first, second); 155 return sys_msgget((key_t) first, second);
156 case MSGCTL: 156 case MSGCTL:
157 return sys_msgctl (first, second, (struct msqid_ds __user *) ptr); 157 return sys_msgctl(first, second, (struct msqid_ds __user *) ptr);
158 158
159 case SHMAT: 159 case SHMAT:
160 switch (version) { 160 switch (version) {
161 default: { 161 default: {
162 ulong raddr; 162 ulong raddr;
163 ret = do_shmat (first, (char __user *) ptr, second, &raddr); 163 ret = do_shmat(first, (char __user *) ptr, second, &raddr);
164 if (ret) 164 if (ret)
165 return ret; 165 return ret;
166 return put_user (raddr, (ulong __user *) third); 166 return put_user(raddr, (ulong __user *) third);
167 } 167 }
168 case 1: /* iBCS2 emulator entry point */ 168 case 1: /* iBCS2 emulator entry point */
169 if (!segment_eq(get_fs(), get_ds())) 169 if (!segment_eq(get_fs(), get_ds()))
170 return -EINVAL; 170 return -EINVAL;
171 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ 171 /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
172 return do_shmat (first, (char __user *) ptr, second, (ulong *) third); 172 return do_shmat(first, (char __user *) ptr, second, (ulong *) third);
173 } 173 }
174 case SHMDT: 174 case SHMDT:
175 return sys_shmdt ((char __user *)ptr); 175 return sys_shmdt((char __user *)ptr);
176 case SHMGET: 176 case SHMGET:
177 return sys_shmget (first, second, third); 177 return sys_shmget(first, second, third);
178 case SHMCTL: 178 case SHMCTL:
179 return sys_shmctl (first, second, 179 return sys_shmctl(first, second,
180 (struct shmid_ds __user *) ptr); 180 (struct shmid_ds __user *) ptr);
181 default: 181 default:
182 return -ENOSYS; 182 return -ENOSYS;
@@ -186,28 +186,28 @@ asmlinkage int sys_ipc (uint call, int first, int second,
186/* 186/*
187 * Old cruft 187 * Old cruft
188 */ 188 */
189asmlinkage int sys_uname(struct old_utsname __user * name) 189asmlinkage int sys_uname(struct old_utsname __user *name)
190{ 190{
191 int err; 191 int err;
192 if (!name) 192 if (!name)
193 return -EFAULT; 193 return -EFAULT;
194 down_read(&uts_sem); 194 down_read(&uts_sem);
195 err = copy_to_user(name, utsname(), sizeof (*name)); 195 err = copy_to_user(name, utsname(), sizeof(*name));
196 up_read(&uts_sem); 196 up_read(&uts_sem);
197 return err?-EFAULT:0; 197 return err? -EFAULT:0;
198} 198}
199 199
200asmlinkage int sys_olduname(struct oldold_utsname __user * name) 200asmlinkage int sys_olduname(struct oldold_utsname __user *name)
201{ 201{
202 int error; 202 int error;
203 203
204 if (!name) 204 if (!name)
205 return -EFAULT; 205 return -EFAULT;
206 if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) 206 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
207 return -EFAULT; 207 return -EFAULT;
208 208
209 down_read(&uts_sem); 209 down_read(&uts_sem);
210 210
211 error = __copy_to_user(&name->sysname, &utsname()->sysname, 211 error = __copy_to_user(&name->sysname, &utsname()->sysname,
212 __OLD_UTS_LEN); 212 __OLD_UTS_LEN);
213 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 213 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
@@ -223,9 +223,9 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
223 error |= __copy_to_user(&name->machine, &utsname()->machine, 223 error |= __copy_to_user(&name->machine, &utsname()->machine,
224 __OLD_UTS_LEN); 224 __OLD_UTS_LEN);
225 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 225 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
226 226
227 up_read(&uts_sem); 227 up_read(&uts_sem);
228 228
229 error = error ? -EFAULT : 0; 229 error = error ? -EFAULT : 0;
230 230
231 return error; 231 return error;
@@ -241,6 +241,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[])
241 long __res; 241 long __res;
242 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" 242 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
243 : "=a" (__res) 243 : "=a" (__res)
244 : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); 244 : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory");
245 return __res; 245 return __res;
246} 246}
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 2ff21f398934..059ca6ee59b4 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -39,9 +39,6 @@
39 39
40#include "do_timer.h" 40#include "do_timer.h"
41 41
42unsigned int cpu_khz; /* Detected as we calibrate the TSC */
43EXPORT_SYMBOL(cpu_khz);
44
45int timer_ack; 42int timer_ack;
46 43
47unsigned long profile_pc(struct pt_regs *regs) 44unsigned long profile_pc(struct pt_regs *regs)
@@ -84,8 +81,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
84 if (timer_ack) { 81 if (timer_ack) {
85 /* 82 /*
86 * Subtle, when I/O APICs are used we have to ack timer IRQ 83 * Subtle, when I/O APICs are used we have to ack timer IRQ
87 * manually to reset the IRR bit for do_slow_gettimeoffset(). 84 * manually to deassert NMI lines for the watchdog if run
88 * This will also deassert NMI lines for the watchdog if run
89 * on an 82489DX-based system. 85 * on an 82489DX-based system.
90 */ 86 */
91 spin_lock(&i8259A_lock); 87 spin_lock(&i8259A_lock);
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index c737849e2ef7..e3d49c553af2 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
56/* calibrate_cpu is used on systems with fixed rate TSCs to determine 56/* calibrate_cpu is used on systems with fixed rate TSCs to determine
57 * processor frequency */ 57 * processor frequency */
58#define TICK_COUNT 100000000 58#define TICK_COUNT 100000000
59unsigned long __init native_calculate_cpu_khz(void) 59unsigned long __init calibrate_cpu(void)
60{ 60{
61 int tsc_start, tsc_now; 61 int tsc_start, tsc_now;
62 int i, no_ctr_free; 62 int i, no_ctr_free;
@@ -116,23 +116,11 @@ void __init hpet_time_init(void)
116 116
117void __init time_init(void) 117void __init time_init(void)
118{ 118{
119 tsc_calibrate(); 119 tsc_init();
120
121 cpu_khz = tsc_khz;
122 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
123 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
124 cpu_khz = calculate_cpu_khz();
125
126 if (unsynchronized_tsc())
127 mark_tsc_unstable("TSCs unsynchronized");
128
129 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 120 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
130 vgetcpu_mode = VGETCPU_RDTSCP; 121 vgetcpu_mode = VGETCPU_RDTSCP;
131 else 122 else
132 vgetcpu_mode = VGETCPU_LSL; 123 vgetcpu_mode = VGETCPU_LSL;
133 124
134 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
135 cpu_khz / 1000, cpu_khz % 1000);
136 init_tsc_clocksource();
137 late_time_init = choose_time_init(); 125 late_time_init = choose_time_init();
138} 126}
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index a1f07d793202..5039d0f097a2 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -15,6 +15,8 @@
15#include <asm/proto.h> 15#include <asm/proto.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/idle.h> 17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
18 20
19#include <mach_ipi.h> 21#include <mach_ipi.h>
20/* 22/*
@@ -162,6 +164,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
162 union smp_flush_state *f; 164 union smp_flush_state *f;
163 cpumask_t cpumask = *cpumaskp; 165 cpumask_t cpumask = *cpumaskp;
164 166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169
165 /* Caller has disabled preemption */ 170 /* Caller has disabled preemption */
166 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
167 f = &per_cpu(flush_state, sender); 172 f = &per_cpu(flush_state, sender);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
new file mode 100644
index 000000000000..d0fbb7712ab0
--- /dev/null
+++ b/arch/x86/kernel/tlb_uv.c
@@ -0,0 +1,792 @@
1/*
2 * SGI UltraViolet TLB flush routines.
3 *
4 * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI.
5 *
6 * This code is released under the GNU General Public License version 2 or
7 * later.
8 */
9#include <linux/mc146818rtc.h>
10#include <linux/proc_fs.h>
11#include <linux/kernel.h>
12
13#include <asm/mmu_context.h>
14#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h>
17#include <asm/genapic.h>
18#include <asm/idle.h>
19#include <asm/tsc.h>
20
21#include <mach_apic.h>
22
23static struct bau_control **uv_bau_table_bases __read_mostly;
24static int uv_bau_retry_limit __read_mostly;
25
26/* position of pnode (which is nasid>>1): */
27static int uv_nshift __read_mostly;
28
29static unsigned long uv_mmask __read_mostly;
30
31static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
32static DEFINE_PER_CPU(struct bau_control, bau_control);
33
34/*
35 * Free a software acknowledge hardware resource by clearing its Pending
36 * bit. This will return a reply to the sender.
37 * If the message has timed out, a reply has already been sent by the
38 * hardware but the resource has not been released. In that case our
39 * clear of the Timeout bit (as well) will free the resource. No reply will
40 * be sent (the hardware will only do one reply per message).
41 */
42static void uv_reply_to_message(int resource,
43 struct bau_payload_queue_entry *msg,
44 struct bau_msg_status *msp)
45{
46 unsigned long dw;
47
48 dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
49 msg->replied_to = 1;
50 msg->sw_ack_vector = 0;
51 if (msp)
52 msp->seen_by.bits = 0;
53 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
54}
55
56/*
57 * Do all the things a cpu should do for a TLB shootdown message.
58 * Other cpu's may come here at the same time for this message.
59 */
60static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
61 int msg_slot, int sw_ack_slot)
62{
63 unsigned long this_cpu_mask;
64 struct bau_msg_status *msp;
65 int cpu;
66
67 msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
68 cpu = uv_blade_processor_id();
69 msg->number_of_cpus =
70 uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
71 this_cpu_mask = 1UL << cpu;
72 if (msp->seen_by.bits & this_cpu_mask)
73 return;
74 atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
75
76 if (msg->replied_to == 1)
77 return;
78
79 if (msg->address == TLB_FLUSH_ALL) {
80 local_flush_tlb();
81 __get_cpu_var(ptcstats).alltlb++;
82 } else {
83 __flush_tlb_one(msg->address);
84 __get_cpu_var(ptcstats).onetlb++;
85 }
86
87 __get_cpu_var(ptcstats).requestee++;
88
89 atomic_inc_short(&msg->acknowledge_count);
90 if (msg->number_of_cpus == msg->acknowledge_count)
91 uv_reply_to_message(sw_ack_slot, msg, msp);
92}
93
94/*
95 * Examine the payload queue on one distribution node to see
96 * which messages have not been seen, and which cpu(s) have not seen them.
97 *
98 * Returns the number of cpu's that have not responded.
99 */
100static int uv_examine_destination(struct bau_control *bau_tablesp, int sender)
101{
102 struct bau_payload_queue_entry *msg;
103 struct bau_msg_status *msp;
104 int count = 0;
105 int i;
106 int j;
107
108 for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE;
109 msg++, i++) {
110 if ((msg->sending_cpu == sender) && (!msg->replied_to)) {
111 msp = bau_tablesp->msg_statuses + i;
112 printk(KERN_DEBUG
113 "blade %d: address:%#lx %d of %d, not cpu(s): ",
114 i, msg->address, msg->acknowledge_count,
115 msg->number_of_cpus);
116 for (j = 0; j < msg->number_of_cpus; j++) {
117 if (!((1L << j) & msp->seen_by.bits)) {
118 count++;
119 printk("%d ", j);
120 }
121 }
122 printk("\n");
123 }
124 }
125 return count;
126}
127
128/*
129 * Examine the payload queue on all the distribution nodes to see
130 * which messages have not been seen, and which cpu(s) have not seen them.
131 *
132 * Returns the number of cpu's that have not responded.
133 */
134static int uv_examine_destinations(struct bau_target_nodemask *distribution)
135{
136 int sender;
137 int i;
138 int count = 0;
139
140 sender = smp_processor_id();
141 for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) {
142 if (!bau_node_isset(i, distribution))
143 continue;
144 count += uv_examine_destination(uv_bau_table_bases[i], sender);
145 }
146 return count;
147}
148
149/*
150 * wait for completion of a broadcast message
151 *
152 * return COMPLETE, RETRY or GIVEUP
153 */
154static int uv_wait_completion(struct bau_desc *bau_desc,
155 unsigned long mmr_offset, int right_shift)
156{
157 int exams = 0;
158 long destination_timeouts = 0;
159 long source_timeouts = 0;
160 unsigned long descriptor_status;
161
162 while ((descriptor_status = (((unsigned long)
163 uv_read_local_mmr(mmr_offset) >>
164 right_shift) & UV_ACT_STATUS_MASK)) !=
165 DESC_STATUS_IDLE) {
166 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
167 source_timeouts++;
168 if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
169 source_timeouts = 0;
170 __get_cpu_var(ptcstats).s_retry++;
171 return FLUSH_RETRY;
172 }
173 /*
174 * spin here looking for progress at the destinations
175 */
176 if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
177 destination_timeouts++;
178 if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
179 /*
180 * returns number of cpus not responding
181 */
182 if (uv_examine_destinations
183 (&bau_desc->distribution) == 0) {
184 __get_cpu_var(ptcstats).d_retry++;
185 return FLUSH_RETRY;
186 }
187 exams++;
188 if (exams >= uv_bau_retry_limit) {
189 printk(KERN_DEBUG
190 "uv_flush_tlb_others");
191 printk("giving up on cpu %d\n",
192 smp_processor_id());
193 return FLUSH_GIVEUP;
194 }
195 /*
196 * delays can hang the simulator
197 udelay(1000);
198 */
199 destination_timeouts = 0;
200 }
201 }
202 }
203 return FLUSH_COMPLETE;
204}
205
206/**
207 * uv_flush_send_and_wait
208 *
209 * Send a broadcast and wait for a broadcast message to complete.
210 *
211 * The cpumaskp mask contains the cpus the broadcast was sent to.
212 *
213 * Returns 1 if all remote flushing was done. The mask is zeroed.
214 * Returns 0 if some remote flushing remains to be done. The mask is left
215 * unchanged.
216 */
217int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
218 cpumask_t *cpumaskp)
219{
220 int completion_status = 0;
221 int right_shift;
222 int tries = 0;
223 int blade;
224 int bit;
225 unsigned long mmr_offset;
226 unsigned long index;
227 cycles_t time1;
228 cycles_t time2;
229
230 if (cpu < UV_CPUS_PER_ACT_STATUS) {
231 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
232 right_shift = cpu * UV_ACT_STATUS_SIZE;
233 } else {
234 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
235 right_shift =
236 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
237 }
238 time1 = get_cycles();
239 do {
240 tries++;
241 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
242 cpu;
243 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
244 completion_status = uv_wait_completion(bau_desc, mmr_offset,
245 right_shift);
246 } while (completion_status == FLUSH_RETRY);
247 time2 = get_cycles();
248 __get_cpu_var(ptcstats).sflush += (time2 - time1);
249 if (tries > 1)
250 __get_cpu_var(ptcstats).retriesok++;
251
252 if (completion_status == FLUSH_GIVEUP) {
253 /*
254 * Cause the caller to do an IPI-style TLB shootdown on
255 * the cpu's, all of which are still in the mask.
256 */
257 __get_cpu_var(ptcstats).ptc_i++;
258 return 0;
259 }
260
261 /*
262 * Success, so clear the remote cpu's from the mask so we don't
263 * use the IPI method of shootdown on them.
264 */
265 for_each_cpu_mask(bit, *cpumaskp) {
266 blade = uv_cpu_to_blade_id(bit);
267 if (blade == this_blade)
268 continue;
269 cpu_clear(bit, *cpumaskp);
270 }
271 if (!cpus_empty(*cpumaskp))
272 return 0;
273 return 1;
274}
275
276/**
277 * uv_flush_tlb_others - globally purge translation cache of a virtual
278 * address or all TLB's
279 * @cpumaskp: mask of all cpu's in which the address is to be removed
280 * @mm: mm_struct containing virtual address range
281 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
282 *
283 * This is the entry point for initiating any UV global TLB shootdown.
284 *
285 * Purges the translation caches of all specified processors of the given
286 * virtual address, or purges all TLB's on specified processors.
287 *
288 * The caller has derived the cpumaskp from the mm_struct and has subtracted
289 * the local cpu from the mask. This function is called only if there
290 * are bits set in the mask. (e.g. flush_tlb_page())
291 *
292 * The cpumaskp is converted into a nodemask of the nodes containing
293 * the cpus.
294 *
295 * Returns 1 if all remote flushing was done.
296 * Returns 0 if some remote flushing remains to be done.
297 */
298int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
299 unsigned long va)
300{
301 int i;
302 int bit;
303 int blade;
304 int cpu;
305 int this_blade;
306 int locals = 0;
307 struct bau_desc *bau_desc;
308
309 cpu = uv_blade_processor_id();
310 this_blade = uv_numa_blade_id();
311 bau_desc = __get_cpu_var(bau_control).descriptor_base;
312 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
313
314 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
315
316 i = 0;
317 for_each_cpu_mask(bit, *cpumaskp) {
318 blade = uv_cpu_to_blade_id(bit);
319 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
320 if (blade == this_blade) {
321 locals++;
322 continue;
323 }
324 bau_node_set(blade, &bau_desc->distribution);
325 i++;
326 }
327 if (i == 0) {
328 /*
329 * no off_node flushing; return status for local node
330 */
331 if (locals)
332 return 0;
333 else
334 return 1;
335 }
336 __get_cpu_var(ptcstats).requestor++;
337 __get_cpu_var(ptcstats).ntargeted += i;
338
339 bau_desc->payload.address = va;
340 bau_desc->payload.sending_cpu = smp_processor_id();
341
342 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
343}
344
345/*
346 * The BAU message interrupt comes here. (registered by set_intr_gate)
347 * See entry_64.S
348 *
349 * We received a broadcast assist message.
350 *
351 * Interrupts may have been disabled; this interrupt could represent
352 * the receipt of several messages.
353 *
354 * All cores/threads on this node get this interrupt.
355 * The last one to see it does the s/w ack.
356 * (the resource will not be freed until noninterruptable cpus see this
357 * interrupt; hardware will timeout the s/w ack and reply ERROR)
358 */
359void uv_bau_message_interrupt(struct pt_regs *regs)
360{
361 struct bau_payload_queue_entry *va_queue_first;
362 struct bau_payload_queue_entry *va_queue_last;
363 struct bau_payload_queue_entry *msg;
364 struct pt_regs *old_regs = set_irq_regs(regs);
365 cycles_t time1;
366 cycles_t time2;
367 int msg_slot;
368 int sw_ack_slot;
369 int fw;
370 int count = 0;
371 unsigned long local_pnode;
372
373 ack_APIC_irq();
374 exit_idle();
375 irq_enter();
376
377 time1 = get_cycles();
378
379 local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
380
381 va_queue_first = __get_cpu_var(bau_control).va_queue_first;
382 va_queue_last = __get_cpu_var(bau_control).va_queue_last;
383
384 msg = __get_cpu_var(bau_control).bau_msg_head;
385 while (msg->sw_ack_vector) {
386 count++;
387 fw = msg->sw_ack_vector;
388 msg_slot = msg - va_queue_first;
389 sw_ack_slot = ffs(fw) - 1;
390
391 uv_bau_process_message(msg, msg_slot, sw_ack_slot);
392
393 msg++;
394 if (msg > va_queue_last)
395 msg = va_queue_first;
396 __get_cpu_var(bau_control).bau_msg_head = msg;
397 }
398 if (!count)
399 __get_cpu_var(ptcstats).nomsg++;
400 else if (count > 1)
401 __get_cpu_var(ptcstats).multmsg++;
402
403 time2 = get_cycles();
404 __get_cpu_var(ptcstats).dflush += (time2 - time1);
405
406 irq_exit();
407 set_irq_regs(old_regs);
408}
409
410static void uv_enable_timeouts(void)
411{
412 int i;
413 int blade;
414 int last_blade;
415 int pnode;
416 int cur_cpu = 0;
417 unsigned long apicid;
418
419 last_blade = -1;
420 for_each_online_node(i) {
421 blade = uv_node_to_blade_id(i);
422 if (blade == last_blade)
423 continue;
424 last_blade = blade;
425 apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
426 pnode = uv_blade_to_pnode(blade);
427 cur_cpu += uv_blade_nr_possible_cpus(i);
428 }
429}
430
431static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
432{
433 if (*offset < num_possible_cpus())
434 return offset;
435 return NULL;
436}
437
438static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
439{
440 (*offset)++;
441 if (*offset < num_possible_cpus())
442 return offset;
443 return NULL;
444}
445
446static void uv_ptc_seq_stop(struct seq_file *file, void *data)
447{
448}
449
450/*
451 * Display the statistics thru /proc
452 * data points to the cpu number
453 */
454static int uv_ptc_seq_show(struct seq_file *file, void *data)
455{
456 struct ptc_stats *stat;
457 int cpu;
458
459 cpu = *(loff_t *)data;
460
461 if (!cpu) {
462 seq_printf(file,
463 "# cpu requestor requestee one all sretry dretry ptc_i ");
464 seq_printf(file,
465 "sw_ack sflush dflush sok dnomsg dmult starget\n");
466 }
467 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
468 stat = &per_cpu(ptcstats, cpu);
469 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ",
470 cpu, stat->requestor,
471 stat->requestee, stat->onetlb, stat->alltlb,
472 stat->s_retry, stat->d_retry, stat->ptc_i);
473 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
474 uv_read_global_mmr64(uv_blade_to_pnode
475 (uv_cpu_to_blade_id(cpu)),
476 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
477 stat->sflush, stat->dflush,
478 stat->retriesok, stat->nomsg,
479 stat->multmsg, stat->ntargeted);
480 }
481
482 return 0;
483}
484
485/*
486 * 0: display meaning of the statistics
487 * >0: retry limit
488 */
489static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
490 size_t count, loff_t *data)
491{
492 long newmode;
493 char optstr[64];
494
495 if (count == 0 || count > sizeof(optstr))
496 return -EINVAL;
497 if (copy_from_user(optstr, user, count))
498 return -EFAULT;
499 optstr[count - 1] = '\0';
500 if (strict_strtoul(optstr, 10, &newmode) < 0) {
501 printk(KERN_DEBUG "%s is invalid\n", optstr);
502 return -EINVAL;
503 }
504
505 if (newmode == 0) {
506 printk(KERN_DEBUG "# cpu: cpu number\n");
507 printk(KERN_DEBUG
508 "requestor: times this cpu was the flush requestor\n");
509 printk(KERN_DEBUG
510 "requestee: times this cpu was requested to flush its TLBs\n");
511 printk(KERN_DEBUG
512 "one: times requested to flush a single address\n");
513 printk(KERN_DEBUG
514 "all: times requested to flush all TLB's\n");
515 printk(KERN_DEBUG
516 "sretry: number of retries of source-side timeouts\n");
517 printk(KERN_DEBUG
518 "dretry: number of retries of destination-side timeouts\n");
519 printk(KERN_DEBUG
520 "ptc_i: times UV fell through to IPI-style flushes\n");
521 printk(KERN_DEBUG
522 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
523 printk(KERN_DEBUG
524 "sflush_us: cycles spent in uv_flush_tlb_others()\n");
525 printk(KERN_DEBUG
526 "dflush_us: cycles spent in handling flush requests\n");
527 printk(KERN_DEBUG "sok: successes on retry\n");
528 printk(KERN_DEBUG "dnomsg: interrupts with no message\n");
529 printk(KERN_DEBUG
530 "dmult: interrupts with multiple messages\n");
531 printk(KERN_DEBUG "starget: nodes targeted\n");
532 } else {
533 uv_bau_retry_limit = newmode;
534 printk(KERN_DEBUG "timeout retry limit:%d\n",
535 uv_bau_retry_limit);
536 }
537
538 return count;
539}
540
541static const struct seq_operations uv_ptc_seq_ops = {
542 .start = uv_ptc_seq_start,
543 .next = uv_ptc_seq_next,
544 .stop = uv_ptc_seq_stop,
545 .show = uv_ptc_seq_show
546};
547
548static int uv_ptc_proc_open(struct inode *inode, struct file *file)
549{
550 return seq_open(file, &uv_ptc_seq_ops);
551}
552
553static const struct file_operations proc_uv_ptc_operations = {
554 .open = uv_ptc_proc_open,
555 .read = seq_read,
556 .write = uv_ptc_proc_write,
557 .llseek = seq_lseek,
558 .release = seq_release,
559};
560
561static int __init uv_ptc_init(void)
562{
563 struct proc_dir_entry *proc_uv_ptc;
564
565 if (!is_uv_system())
566 return 0;
567
568 if (!proc_mkdir("sgi_uv", NULL))
569 return -EINVAL;
570
571 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
572 if (!proc_uv_ptc) {
573 printk(KERN_ERR "unable to create %s proc entry\n",
574 UV_PTC_BASENAME);
575 remove_proc_entry("sgi_uv", NULL);
576 return -EINVAL;
577 }
578 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
579 return 0;
580}
581
582/*
583 * begin the initialization of the per-blade control structures
584 */
585static struct bau_control * __init uv_table_bases_init(int blade, int node)
586{
587 int i;
588 int *ip;
589 struct bau_msg_status *msp;
590 struct bau_control *bau_tabp;
591
592 bau_tabp =
593 kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
594 BUG_ON(!bau_tabp);
595
596 bau_tabp->msg_statuses =
597 kmalloc_node(sizeof(struct bau_msg_status) *
598 DEST_Q_SIZE, GFP_KERNEL, node);
599 BUG_ON(!bau_tabp->msg_statuses);
600
601 for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++)
602 bau_cpubits_clear(&msp->seen_by, (int)
603 uv_blade_nr_possible_cpus(blade));
604
605 bau_tabp->watching =
606 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
607 BUG_ON(!bau_tabp->watching);
608
609 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
610 *ip = 0;
611
612 uv_bau_table_bases[blade] = bau_tabp;
613
614 return bau_tabp;
615}
616
617/*
618 * finish the initialization of the per-blade control structures
619 */
620static void __init
621uv_table_bases_finish(int blade, int node, int cur_cpu,
622 struct bau_control *bau_tablesp,
623 struct bau_desc *adp)
624{
625 struct bau_control *bcp;
626 int i;
627
628 for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
629 bcp = (struct bau_control *)&per_cpu(bau_control, i);
630
631 bcp->bau_msg_head = bau_tablesp->va_queue_first;
632 bcp->va_queue_first = bau_tablesp->va_queue_first;
633 bcp->va_queue_last = bau_tablesp->va_queue_last;
634 bcp->watching = bau_tablesp->watching;
635 bcp->msg_statuses = bau_tablesp->msg_statuses;
636 bcp->descriptor_base = adp;
637 }
638}
639
640/*
641 * initialize the sending side's sending buffers
642 */
643static struct bau_desc * __init
644uv_activation_descriptor_init(int node, int pnode)
645{
646 int i;
647 unsigned long pa;
648 unsigned long m;
649 unsigned long n;
650 unsigned long mmr_image;
651 struct bau_desc *adp;
652 struct bau_desc *ad2;
653
654 adp = (struct bau_desc *)
655 kmalloc_node(16384, GFP_KERNEL, node);
656 BUG_ON(!adp);
657
658 pa = __pa((unsigned long)adp);
659 n = pa >> uv_nshift;
660 m = pa & uv_mmask;
661
662 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE);
663 if (mmr_image) {
664 uv_write_global_mmr64(pnode, (unsigned long)
665 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
666 (n << UV_DESC_BASE_PNODE_SHIFT | m));
667 }
668
669 for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
670 memset(ad2, 0, sizeof(struct bau_desc));
671 ad2->header.sw_ack_flag = 1;
672 ad2->header.base_dest_nodeid =
673 uv_blade_to_pnode(uv_cpu_to_blade_id(0));
674 ad2->header.command = UV_NET_ENDPOINT_INTD;
675 ad2->header.int_both = 1;
676 /*
677 * all others need to be set to zero:
678 * fairness chaining multilevel count replied_to
679 */
680 }
681 return adp;
682}
683
684/*
685 * initialize the destination side's receiving buffers
686 */
687static struct bau_payload_queue_entry * __init
688uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
689{
690 struct bau_payload_queue_entry *pqp;
691 char *cp;
692
693 pqp = (struct bau_payload_queue_entry *) kmalloc_node(
694 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
695 GFP_KERNEL, node);
696 BUG_ON(!pqp);
697
698 cp = (char *)pqp + 31;
699 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
700 bau_tablesp->va_queue_first = pqp;
701 uv_write_global_mmr64(pnode,
702 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
703 ((unsigned long)pnode <<
704 UV_PAYLOADQ_PNODE_SHIFT) |
705 uv_physnodeaddr(pqp));
706 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
707 uv_physnodeaddr(pqp));
708 bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
709 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
710 (unsigned long)
711 uv_physnodeaddr(bau_tablesp->va_queue_last));
712 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
713
714 return pqp;
715}
716
717/*
718 * Initialization of each UV blade's structures
719 */
720static int __init uv_init_blade(int blade, int node, int cur_cpu)
721{
722 int pnode;
723 unsigned long pa;
724 unsigned long apicid;
725 struct bau_desc *adp;
726 struct bau_payload_queue_entry *pqp;
727 struct bau_control *bau_tablesp;
728
729 bau_tablesp = uv_table_bases_init(blade, node);
730 pnode = uv_blade_to_pnode(blade);
731 adp = uv_activation_descriptor_init(node, pnode);
732 pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
733 uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
734 /*
735 * the below initialization can't be in firmware because the
736 * messaging IRQ will be determined by the OS
737 */
738 apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
739 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
740 if ((pa & 0xff) != UV_BAU_MESSAGE) {
741 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
742 ((apicid << 32) | UV_BAU_MESSAGE));
743 }
744 return 0;
745}
746
747/*
748 * Initialization of BAU-related structures
749 */
750static int __init uv_bau_init(void)
751{
752 int blade;
753 int node;
754 int nblades;
755 int last_blade;
756 int cur_cpu = 0;
757
758 if (!is_uv_system())
759 return 0;
760
761 uv_bau_retry_limit = 1;
762 uv_nshift = uv_hub_info->n_val;
763 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
764 nblades = 0;
765 last_blade = -1;
766 for_each_online_node(node) {
767 blade = uv_node_to_blade_id(node);
768 if (blade == last_blade)
769 continue;
770 last_blade = blade;
771 nblades++;
772 }
773 uv_bau_table_bases = (struct bau_control **)
774 kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
775 BUG_ON(!uv_bau_table_bases);
776
777 last_blade = -1;
778 for_each_online_node(node) {
779 blade = uv_node_to_blade_id(node);
780 if (blade == last_blade)
781 continue;
782 last_blade = blade;
783 uv_init_blade(blade, node, cur_cpu);
784 cur_cpu += uv_blade_nr_possible_cpus(blade);
785 }
786 set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
787 uv_enable_timeouts();
788
789 return 0;
790}
791__initcall(uv_bau_init);
792__initcall(uv_ptc_init);
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index abbf199adebb..1106fac6024d 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -2,7 +2,7 @@
2 2
3#include <asm/trampoline.h> 3#include <asm/trampoline.h>
4 4
5/* ready for x86_64, no harm for x86, since it will overwrite after alloc */ 5/* ready for x86_64 and x86 */
6unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); 6unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
7 7
8/* 8/*
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 08d752de4eee..8a768973c4f0 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
3 * 4 *
4 * Pentium III FXSR, SSE support 5 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * Gareth Hughes <gareth@valinux.com>, May 2000
@@ -60,8 +61,6 @@
60 61
61#include "mach_traps.h" 62#include "mach_traps.h"
62 63
63int panic_on_unrecovered_nmi;
64
65DECLARE_BITMAP(used_vectors, NR_VECTORS); 64DECLARE_BITMAP(used_vectors, NR_VECTORS);
66EXPORT_SYMBOL_GPL(used_vectors); 65EXPORT_SYMBOL_GPL(used_vectors);
67 66
@@ -98,19 +97,22 @@ asmlinkage void alignment_check(void);
98asmlinkage void spurious_interrupt_bug(void); 97asmlinkage void spurious_interrupt_bug(void);
99asmlinkage void machine_check(void); 98asmlinkage void machine_check(void);
100 99
100int panic_on_unrecovered_nmi;
101int kstack_depth_to_print = 24; 101int kstack_depth_to_print = 24;
102static unsigned int code_bytes = 64; 102static unsigned int code_bytes = 64;
103static int ignore_nmis;
104static int die_counter;
103 105
104void printk_address(unsigned long address, int reliable) 106void printk_address(unsigned long address, int reliable)
105{ 107{
106#ifdef CONFIG_KALLSYMS 108#ifdef CONFIG_KALLSYMS
107 char namebuf[KSYM_NAME_LEN];
108 unsigned long offset = 0; 109 unsigned long offset = 0;
109 unsigned long symsize; 110 unsigned long symsize;
110 const char *symname; 111 const char *symname;
111 char reliab[4] = "";
112 char *delim = ":";
113 char *modname; 112 char *modname;
113 char *delim = ":";
114 char namebuf[KSYM_NAME_LEN];
115 char reliab[4] = "";
114 116
115 symname = kallsyms_lookup(address, &symsize, &offset, 117 symname = kallsyms_lookup(address, &symsize, &offset,
116 &modname, namebuf); 118 &modname, namebuf);
@@ -130,22 +132,23 @@ void printk_address(unsigned long address, int reliable)
130#endif 132#endif
131} 133}
132 134
133static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) 135static inline int valid_stack_ptr(struct thread_info *tinfo,
136 void *p, unsigned int size)
134{ 137{
135 return p > (void *)tinfo && 138 void *t = tinfo;
136 p <= (void *)tinfo + THREAD_SIZE - size; 139 return p > t && p <= t + THREAD_SIZE - size;
137} 140}
138 141
139/* The form of the top of the frame on the stack */ 142/* The form of the top of the frame on the stack */
140struct stack_frame { 143struct stack_frame {
141 struct stack_frame *next_frame; 144 struct stack_frame *next_frame;
142 unsigned long return_address; 145 unsigned long return_address;
143}; 146};
144 147
145static inline unsigned long 148static inline unsigned long
146print_context_stack(struct thread_info *tinfo, 149print_context_stack(struct thread_info *tinfo,
147 unsigned long *stack, unsigned long bp, 150 unsigned long *stack, unsigned long bp,
148 const struct stacktrace_ops *ops, void *data) 151 const struct stacktrace_ops *ops, void *data)
149{ 152{
150 struct stack_frame *frame = (struct stack_frame *)bp; 153 struct stack_frame *frame = (struct stack_frame *)bp;
151 154
@@ -167,8 +170,6 @@ print_context_stack(struct thread_info *tinfo,
167 return bp; 170 return bp;
168} 171}
169 172
170#define MSG(msg) ops->warning(data, msg)
171
172void dump_trace(struct task_struct *task, struct pt_regs *regs, 173void dump_trace(struct task_struct *task, struct pt_regs *regs,
173 unsigned long *stack, unsigned long bp, 174 unsigned long *stack, unsigned long bp,
174 const struct stacktrace_ops *ops, void *data) 175 const struct stacktrace_ops *ops, void *data)
@@ -178,7 +179,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
178 179
179 if (!stack) { 180 if (!stack) {
180 unsigned long dummy; 181 unsigned long dummy;
181
182 stack = &dummy; 182 stack = &dummy;
183 if (task != current) 183 if (task != current)
184 stack = (unsigned long *)task->thread.sp; 184 stack = (unsigned long *)task->thread.sp;
@@ -196,7 +196,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
196 } 196 }
197#endif 197#endif
198 198
199 while (1) { 199 for (;;) {
200 struct thread_info *context; 200 struct thread_info *context;
201 201
202 context = (struct thread_info *) 202 context = (struct thread_info *)
@@ -248,10 +248,10 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
248} 248}
249 249
250static const struct stacktrace_ops print_trace_ops = { 250static const struct stacktrace_ops print_trace_ops = {
251 .warning = print_trace_warning, 251 .warning = print_trace_warning,
252 .warning_symbol = print_trace_warning_symbol, 252 .warning_symbol = print_trace_warning_symbol,
253 .stack = print_trace_stack, 253 .stack = print_trace_stack,
254 .address = print_trace_address, 254 .address = print_trace_address,
255}; 255};
256 256
257static void 257static void
@@ -351,15 +351,14 @@ void show_registers(struct pt_regs *regs)
351 printk(KERN_EMERG "Code: "); 351 printk(KERN_EMERG "Code: ");
352 352
353 ip = (u8 *)regs->ip - code_prologue; 353 ip = (u8 *)regs->ip - code_prologue;
354 if (ip < (u8 *)PAGE_OFFSET || 354 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
355 probe_kernel_address(ip, c)) {
356 /* try starting at EIP */ 355 /* try starting at EIP */
357 ip = (u8 *)regs->ip; 356 ip = (u8 *)regs->ip;
358 code_len = code_len - code_prologue + 1; 357 code_len = code_len - code_prologue + 1;
359 } 358 }
360 for (i = 0; i < code_len; i++, ip++) { 359 for (i = 0; i < code_len; i++, ip++) {
361 if (ip < (u8 *)PAGE_OFFSET || 360 if (ip < (u8 *)PAGE_OFFSET ||
362 probe_kernel_address(ip, c)) { 361 probe_kernel_address(ip, c)) {
363 printk(" Bad EIP value."); 362 printk(" Bad EIP value.");
364 break; 363 break;
365 } 364 }
@@ -384,8 +383,6 @@ int is_valid_bugaddr(unsigned long ip)
384 return ud2 == 0x0b0f; 383 return ud2 == 0x0b0f;
385} 384}
386 385
387static int die_counter;
388
389int __kprobes __die(const char *str, struct pt_regs *regs, long err) 386int __kprobes __die(const char *str, struct pt_regs *regs, long err)
390{ 387{
391 unsigned short ss; 388 unsigned short ss;
@@ -402,26 +399,22 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
402 printk("DEBUG_PAGEALLOC"); 399 printk("DEBUG_PAGEALLOC");
403#endif 400#endif
404 printk("\n"); 401 printk("\n");
405
406 if (notify_die(DIE_OOPS, str, regs, err, 402 if (notify_die(DIE_OOPS, str, regs, err,
407 current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { 403 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
408 404 return 1;
409 show_registers(regs);
410 /* Executive summary in case the oops scrolled away */
411 sp = (unsigned long) (&regs->sp);
412 savesegment(ss, ss);
413 if (user_mode(regs)) {
414 sp = regs->sp;
415 ss = regs->ss & 0xffff;
416 }
417 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
418 print_symbol("%s", regs->ip);
419 printk(" SS:ESP %04x:%08lx\n", ss, sp);
420 405
421 return 0; 406 show_registers(regs);
407 /* Executive summary in case the oops scrolled away */
408 sp = (unsigned long) (&regs->sp);
409 savesegment(ss, ss);
410 if (user_mode(regs)) {
411 sp = regs->sp;
412 ss = regs->ss & 0xffff;
422 } 413 }
423 414 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
424 return 1; 415 print_symbol("%s", regs->ip);
416 printk(" SS:ESP %04x:%08lx\n", ss, sp);
417 return 0;
425} 418}
426 419
427/* 420/*
@@ -546,7 +539,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
546{ \ 539{ \
547 trace_hardirqs_fixup(); \ 540 trace_hardirqs_fixup(); \
548 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 541 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
549 == NOTIFY_STOP) \ 542 == NOTIFY_STOP) \
550 return; \ 543 return; \
551 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 544 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
552} 545}
@@ -562,7 +555,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
562 info.si_code = sicode; \ 555 info.si_code = sicode; \
563 info.si_addr = (void __user *)siaddr; \ 556 info.si_addr = (void __user *)siaddr; \
564 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 557 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
565 == NOTIFY_STOP) \ 558 == NOTIFY_STOP) \
566 return; \ 559 return; \
567 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ 560 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
568} 561}
@@ -571,7 +564,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
571void do_##name(struct pt_regs *regs, long error_code) \ 564void do_##name(struct pt_regs *regs, long error_code) \
572{ \ 565{ \
573 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 566 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
574 == NOTIFY_STOP) \ 567 == NOTIFY_STOP) \
575 return; \ 568 return; \
576 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ 569 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
577} 570}
@@ -586,27 +579,29 @@ void do_##name(struct pt_regs *regs, long error_code) \
586 info.si_addr = (void __user *)siaddr; \ 579 info.si_addr = (void __user *)siaddr; \
587 trace_hardirqs_fixup(); \ 580 trace_hardirqs_fixup(); \
588 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 581 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
589 == NOTIFY_STOP) \ 582 == NOTIFY_STOP) \
590 return; \ 583 return; \
591 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ 584 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
592} 585}
593 586
594DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) 587DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
595#ifndef CONFIG_KPROBES 588#ifndef CONFIG_KPROBES
596DO_VM86_ERROR(3, SIGTRAP, "int3", int3) 589DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
597#endif 590#endif
598DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) 591DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
599DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) 592DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
600DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) 593DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
601DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 594DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
602DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 595DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
603DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 596DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
604DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 597DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
605DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) 598DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
606DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) 599DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
607 600
608void __kprobes do_general_protection(struct pt_regs *regs, long error_code) 601void __kprobes
602do_general_protection(struct pt_regs *regs, long error_code)
609{ 603{
604 struct task_struct *tsk;
610 struct thread_struct *thread; 605 struct thread_struct *thread;
611 struct tss_struct *tss; 606 struct tss_struct *tss;
612 int cpu; 607 int cpu;
@@ -647,23 +642,24 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
647 if (regs->flags & X86_VM_MASK) 642 if (regs->flags & X86_VM_MASK)
648 goto gp_in_vm86; 643 goto gp_in_vm86;
649 644
645 tsk = current;
650 if (!user_mode(regs)) 646 if (!user_mode(regs))
651 goto gp_in_kernel; 647 goto gp_in_kernel;
652 648
653 current->thread.error_code = error_code; 649 tsk->thread.error_code = error_code;
654 current->thread.trap_no = 13; 650 tsk->thread.trap_no = 13;
655 651
656 if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && 652 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
657 printk_ratelimit()) { 653 printk_ratelimit()) {
658 printk(KERN_INFO 654 printk(KERN_INFO
659 "%s[%d] general protection ip:%lx sp:%lx error:%lx", 655 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
660 current->comm, task_pid_nr(current), 656 tsk->comm, task_pid_nr(tsk),
661 regs->ip, regs->sp, error_code); 657 regs->ip, regs->sp, error_code);
662 print_vma_addr(" in ", regs->ip); 658 print_vma_addr(" in ", regs->ip);
663 printk("\n"); 659 printk("\n");
664 } 660 }
665 661
666 force_sig(SIGSEGV, current); 662 force_sig(SIGSEGV, tsk);
667 return; 663 return;
668 664
669gp_in_vm86: 665gp_in_vm86:
@@ -672,14 +668,15 @@ gp_in_vm86:
672 return; 668 return;
673 669
674gp_in_kernel: 670gp_in_kernel:
675 if (!fixup_exception(regs)) { 671 if (fixup_exception(regs))
676 current->thread.error_code = error_code; 672 return;
677 current->thread.trap_no = 13; 673
678 if (notify_die(DIE_GPF, "general protection fault", regs, 674 tsk->thread.error_code = error_code;
675 tsk->thread.trap_no = 13;
676 if (notify_die(DIE_GPF, "general protection fault", regs,
679 error_code, 13, SIGSEGV) == NOTIFY_STOP) 677 error_code, 13, SIGSEGV) == NOTIFY_STOP)
680 return; 678 return;
681 die("general protection fault", regs, error_code); 679 die("general protection fault", regs, error_code);
682 }
683} 680}
684 681
685static notrace __kprobes void 682static notrace __kprobes void
@@ -756,9 +753,9 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
756 753
757static DEFINE_SPINLOCK(nmi_print_lock); 754static DEFINE_SPINLOCK(nmi_print_lock);
758 755
759void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) 756void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
760{ 757{
761 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) 758 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
762 return; 759 return;
763 760
764 spin_lock(&nmi_print_lock); 761 spin_lock(&nmi_print_lock);
@@ -767,10 +764,12 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
767 * to get a message out: 764 * to get a message out:
768 */ 765 */
769 bust_spinlocks(1); 766 bust_spinlocks(1);
770 printk(KERN_EMERG "%s", msg); 767 printk(KERN_EMERG "%s", str);
771 printk(" on CPU%d, ip %08lx, registers:\n", 768 printk(" on CPU%d, ip %08lx, registers:\n",
772 smp_processor_id(), regs->ip); 769 smp_processor_id(), regs->ip);
773 show_registers(regs); 770 show_registers(regs);
771 if (do_panic)
772 panic("Non maskable interrupt");
774 console_silent(); 773 console_silent();
775 spin_unlock(&nmi_print_lock); 774 spin_unlock(&nmi_print_lock);
776 bust_spinlocks(0); 775 bust_spinlocks(0);
@@ -790,14 +789,17 @@ void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
790static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 789static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
791{ 790{
792 unsigned char reason = 0; 791 unsigned char reason = 0;
792 int cpu;
793
794 cpu = smp_processor_id();
793 795
794 /* Only the BSP gets external NMIs from the system: */ 796 /* Only the BSP gets external NMIs from the system. */
795 if (!smp_processor_id()) 797 if (!cpu)
796 reason = get_nmi_reason(); 798 reason = get_nmi_reason();
797 799
798 if (!(reason & 0xc0)) { 800 if (!(reason & 0xc0)) {
799 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 801 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
800 == NOTIFY_STOP) 802 == NOTIFY_STOP)
801 return; 803 return;
802#ifdef CONFIG_X86_LOCAL_APIC 804#ifdef CONFIG_X86_LOCAL_APIC
803 /* 805 /*
@@ -806,7 +808,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
806 */ 808 */
807 if (nmi_watchdog_tick(regs, reason)) 809 if (nmi_watchdog_tick(regs, reason))
808 return; 810 return;
809 if (!do_nmi_callback(regs, smp_processor_id())) 811 if (!do_nmi_callback(regs, cpu))
810 unknown_nmi_error(reason, regs); 812 unknown_nmi_error(reason, regs);
811#else 813#else
812 unknown_nmi_error(reason, regs); 814 unknown_nmi_error(reason, regs);
@@ -816,6 +818,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
816 } 818 }
817 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 819 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
818 return; 820 return;
821
822 /* AK: following checks seem to be broken on modern chipsets. FIXME */
819 if (reason & 0x80) 823 if (reason & 0x80)
820 mem_parity_error(reason, regs); 824 mem_parity_error(reason, regs);
821 if (reason & 0x40) 825 if (reason & 0x40)
@@ -827,8 +831,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
827 reassert_nmi(); 831 reassert_nmi();
828} 832}
829 833
830static int ignore_nmis;
831
832notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) 834notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
833{ 835{
834 int cpu; 836 int cpu;
@@ -913,7 +915,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
913 tsk->thread.debugctlmsr = 0; 915 tsk->thread.debugctlmsr = 0;
914 916
915 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 917 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
916 SIGTRAP) == NOTIFY_STOP) 918 SIGTRAP) == NOTIFY_STOP)
917 return; 919 return;
918 /* It's safe to allow irq's after DR6 has been saved */ 920 /* It's safe to allow irq's after DR6 has been saved */
919 if (regs->flags & X86_EFLAGS_IF) 921 if (regs->flags & X86_EFLAGS_IF)
@@ -974,9 +976,8 @@ clear_TF_reenable:
974void math_error(void __user *ip) 976void math_error(void __user *ip)
975{ 977{
976 struct task_struct *task; 978 struct task_struct *task;
977 unsigned short cwd;
978 unsigned short swd;
979 siginfo_t info; 979 siginfo_t info;
980 unsigned short cwd, swd;
980 981
981 /* 982 /*
982 * Save the info for the exception handler and clear the error. 983 * Save the info for the exception handler and clear the error.
@@ -995,7 +996,7 @@ void math_error(void __user *ip)
995 * C1 reg you need in case of a stack fault, 0x040 is the stack 996 * C1 reg you need in case of a stack fault, 0x040 is the stack
996 * fault bit. We should only be taking one exception at a time, 997 * fault bit. We should only be taking one exception at a time,
997 * so if this combination doesn't produce any single exception, 998 * so if this combination doesn't produce any single exception,
998 * then we have a bad program that isn't syncronizing its FPU usage 999 * then we have a bad program that isn't synchronizing its FPU usage
999 * and it will suffer the consequences since we won't be able to 1000 * and it will suffer the consequences since we won't be able to
1000 * fully reproduce the context of the exception 1001 * fully reproduce the context of the exception
1001 */ 1002 */
@@ -1004,7 +1005,7 @@ void math_error(void __user *ip)
1004 switch (swd & ~cwd & 0x3f) { 1005 switch (swd & ~cwd & 0x3f) {
1005 case 0x000: /* No unmasked exception */ 1006 case 0x000: /* No unmasked exception */
1006 return; 1007 return;
1007 default: /* Multiple exceptions */ 1008 default: /* Multiple exceptions */
1008 break; 1009 break;
1009 case 0x001: /* Invalid Op */ 1010 case 0x001: /* Invalid Op */
1010 /* 1011 /*
@@ -1040,8 +1041,8 @@ void do_coprocessor_error(struct pt_regs *regs, long error_code)
1040static void simd_math_error(void __user *ip) 1041static void simd_math_error(void __user *ip)
1041{ 1042{
1042 struct task_struct *task; 1043 struct task_struct *task;
1043 unsigned short mxcsr;
1044 siginfo_t info; 1044 siginfo_t info;
1045 unsigned short mxcsr;
1045 1046
1046 /* 1047 /*
1047 * Save the info for the exception handler and clear the error. 1048 * Save the info for the exception handler and clear the error.
@@ -1117,7 +1118,7 @@ void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
1117 1118
1118unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) 1119unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
1119{ 1120{
1120 struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; 1121 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
1121 unsigned long base = (kesp - uesp) & -THREAD_SIZE; 1122 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
1122 unsigned long new_kesp = kesp - base; 1123 unsigned long new_kesp = kesp - base;
1123 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; 1124 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
@@ -1196,19 +1197,16 @@ void __init trap_init(void)
1196 early_iounmap(p, 4); 1197 early_iounmap(p, 4);
1197#endif 1198#endif
1198 1199
1199#ifdef CONFIG_X86_LOCAL_APIC 1200 set_trap_gate(0, &divide_error);
1200 init_apic_mappings(); 1201 set_intr_gate(1, &debug);
1201#endif 1202 set_intr_gate(2, &nmi);
1202 set_trap_gate(0, &divide_error); 1203 set_system_intr_gate(3, &int3); /* int3 can be called from all */
1203 set_intr_gate(1, &debug); 1204 set_system_gate(4, &overflow); /* int4 can be called from all */
1204 set_intr_gate(2, &nmi); 1205 set_trap_gate(5, &bounds);
1205 set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ 1206 set_trap_gate(6, &invalid_op);
1206 set_system_gate(4, &overflow); 1207 set_trap_gate(7, &device_not_available);
1207 set_trap_gate(5, &bounds); 1208 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
1208 set_trap_gate(6, &invalid_op); 1209 set_trap_gate(9, &coprocessor_segment_overrun);
1209 set_trap_gate(7, &device_not_available);
1210 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
1211 set_trap_gate(9, &coprocessor_segment_overrun);
1212 set_trap_gate(10, &invalid_TSS); 1210 set_trap_gate(10, &invalid_TSS);
1213 set_trap_gate(11, &segment_not_present); 1211 set_trap_gate(11, &segment_not_present);
1214 set_trap_gate(12, &stack_segment); 1212 set_trap_gate(12, &stack_segment);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index adff76ea97c4..74e992957ff6 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -10,49 +10,49 @@
10 * 'Traps.c' handles hardware traps and faults after we have saved some 10 * 'Traps.c' handles hardware traps and faults after we have saved some
11 * state in 'entry.S'. 11 * state in 'entry.S'.
12 */ 12 */
13#include <linux/sched.h> 13#include <linux/moduleparam.h>
14#include <linux/interrupt.h>
15#include <linux/kallsyms.h>
16#include <linux/spinlock.h>
17#include <linux/kprobes.h>
18#include <linux/uaccess.h>
19#include <linux/utsname.h>
20#include <linux/kdebug.h>
14#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/ptrace.h>
15#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/unwind.h>
26#include <linux/delay.h>
16#include <linux/errno.h> 27#include <linux/errno.h>
17#include <linux/ptrace.h> 28#include <linux/kexec.h>
29#include <linux/sched.h>
18#include <linux/timer.h> 30#include <linux/timer.h>
19#include <linux/mm.h>
20#include <linux/init.h> 31#include <linux/init.h>
21#include <linux/delay.h>
22#include <linux/spinlock.h>
23#include <linux/interrupt.h>
24#include <linux/kallsyms.h>
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/nmi.h>
28#include <linux/kprobes.h>
29#include <linux/kexec.h>
30#include <linux/unwind.h>
31#include <linux/uaccess.h>
32#include <linux/bug.h> 32#include <linux/bug.h>
33#include <linux/kdebug.h> 33#include <linux/nmi.h>
34#include <linux/utsname.h> 34#include <linux/mm.h>
35
36#include <mach_traps.h>
37 35
38#if defined(CONFIG_EDAC) 36#if defined(CONFIG_EDAC)
39#include <linux/edac.h> 37#include <linux/edac.h>
40#endif 38#endif
41 39
42#include <asm/system.h> 40#include <asm/stacktrace.h>
43#include <asm/io.h> 41#include <asm/processor.h>
44#include <asm/atomic.h>
45#include <asm/debugreg.h> 42#include <asm/debugreg.h>
43#include <asm/atomic.h>
44#include <asm/system.h>
45#include <asm/unwind.h>
46#include <asm/desc.h> 46#include <asm/desc.h>
47#include <asm/i387.h> 47#include <asm/i387.h>
48#include <asm/processor.h> 48#include <asm/nmi.h>
49#include <asm/unwind.h>
50#include <asm/smp.h> 49#include <asm/smp.h>
50#include <asm/io.h>
51#include <asm/pgalloc.h> 51#include <asm/pgalloc.h>
52#include <asm/pda.h>
53#include <asm/proto.h> 52#include <asm/proto.h>
54#include <asm/nmi.h> 53#include <asm/pda.h>
55#include <asm/stacktrace.h> 54
55#include <mach_traps.h>
56 56
57asmlinkage void divide_error(void); 57asmlinkage void divide_error(void);
58asmlinkage void debug(void); 58asmlinkage void debug(void);
@@ -71,12 +71,15 @@ asmlinkage void general_protection(void);
71asmlinkage void page_fault(void); 71asmlinkage void page_fault(void);
72asmlinkage void coprocessor_error(void); 72asmlinkage void coprocessor_error(void);
73asmlinkage void simd_coprocessor_error(void); 73asmlinkage void simd_coprocessor_error(void);
74asmlinkage void reserved(void);
75asmlinkage void alignment_check(void); 74asmlinkage void alignment_check(void);
76asmlinkage void machine_check(void);
77asmlinkage void spurious_interrupt_bug(void); 75asmlinkage void spurious_interrupt_bug(void);
76asmlinkage void machine_check(void);
78 77
78int panic_on_unrecovered_nmi;
79int kstack_depth_to_print = 12;
79static unsigned int code_bytes = 64; 80static unsigned int code_bytes = 64;
81static int ignore_nmis;
82static int die_counter;
80 83
81static inline void conditional_sti(struct pt_regs *regs) 84static inline void conditional_sti(struct pt_regs *regs)
82{ 85{
@@ -100,8 +103,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
100 dec_preempt_count(); 103 dec_preempt_count();
101} 104}
102 105
103int kstack_depth_to_print = 12;
104
105void printk_address(unsigned long address, int reliable) 106void printk_address(unsigned long address, int reliable)
106{ 107{
107#ifdef CONFIG_KALLSYMS 108#ifdef CONFIG_KALLSYMS
@@ -204,8 +205,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
204 return NULL; 205 return NULL;
205} 206}
206 207
207#define MSG(txt) ops->warning(data, txt)
208
209/* 208/*
210 * x86-64 can have up to three kernel stacks: 209 * x86-64 can have up to three kernel stacks:
211 * process stack 210 * process stack
@@ -232,11 +231,11 @@ struct stack_frame {
232 unsigned long return_address; 231 unsigned long return_address;
233}; 232};
234 233
235 234static inline unsigned long
236static inline unsigned long print_context_stack(struct thread_info *tinfo, 235print_context_stack(struct thread_info *tinfo,
237 unsigned long *stack, unsigned long bp, 236 unsigned long *stack, unsigned long bp,
238 const struct stacktrace_ops *ops, void *data, 237 const struct stacktrace_ops *ops, void *data,
239 unsigned long *end) 238 unsigned long *end)
240{ 239{
241 struct stack_frame *frame = (struct stack_frame *)bp; 240 struct stack_frame *frame = (struct stack_frame *)bp;
242 241
@@ -258,7 +257,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
258 return bp; 257 return bp;
259} 258}
260 259
261void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 260void dump_trace(struct task_struct *task, struct pt_regs *regs,
262 unsigned long *stack, unsigned long bp, 261 unsigned long *stack, unsigned long bp,
263 const struct stacktrace_ops *ops, void *data) 262 const struct stacktrace_ops *ops, void *data)
264{ 263{
@@ -267,36 +266,34 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
267 unsigned used = 0; 266 unsigned used = 0;
268 struct thread_info *tinfo; 267 struct thread_info *tinfo;
269 268
270 if (!tsk) 269 if (!task)
271 tsk = current; 270 task = current;
272 tinfo = task_thread_info(tsk);
273 271
274 if (!stack) { 272 if (!stack) {
275 unsigned long dummy; 273 unsigned long dummy;
276 stack = &dummy; 274 stack = &dummy;
277 if (tsk && tsk != current) 275 if (task && task != current)
278 stack = (unsigned long *)tsk->thread.sp; 276 stack = (unsigned long *)task->thread.sp;
279 } 277 }
280 278
281#ifdef CONFIG_FRAME_POINTER 279#ifdef CONFIG_FRAME_POINTER
282 if (!bp) { 280 if (!bp) {
283 if (tsk == current) { 281 if (task == current) {
284 /* Grab bp right from our regs */ 282 /* Grab bp right from our regs */
285 asm("movq %%rbp, %0" : "=r" (bp):); 283 asm("movq %%rbp, %0" : "=r" (bp) :);
286 } else { 284 } else {
287 /* bp is the last reg pushed by switch_to */ 285 /* bp is the last reg pushed by switch_to */
288 bp = *(unsigned long *) tsk->thread.sp; 286 bp = *(unsigned long *) task->thread.sp;
289 } 287 }
290 } 288 }
291#endif 289#endif
292 290
293
294
295 /* 291 /*
296 * Print function call entries in all stacks, starting at the 292 * Print function call entries in all stacks, starting at the
297 * current stack address. If the stacks consist of nested 293 * current stack address. If the stacks consist of nested
298 * exceptions 294 * exceptions
299 */ 295 */
296 tinfo = task_thread_info(task);
300 for (;;) { 297 for (;;) {
301 char *id; 298 char *id;
302 unsigned long *estack_end; 299 unsigned long *estack_end;
@@ -381,18 +378,17 @@ static const struct stacktrace_ops print_trace_ops = {
381 .address = print_trace_address, 378 .address = print_trace_address,
382}; 379};
383 380
384void 381void show_trace(struct task_struct *task, struct pt_regs *regs,
385show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, 382 unsigned long *stack, unsigned long bp)
386 unsigned long bp)
387{ 383{
388 printk("\nCall Trace:\n"); 384 printk("\nCall Trace:\n");
389 dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); 385 dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
390 printk("\n"); 386 printk("\n");
391} 387}
392 388
393static void 389static void
394_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, 390_show_stack(struct task_struct *task, struct pt_regs *regs,
395 unsigned long bp) 391 unsigned long *sp, unsigned long bp)
396{ 392{
397 unsigned long *stack; 393 unsigned long *stack;
398 int i; 394 int i;
@@ -404,14 +400,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
404 // back trace for this cpu. 400 // back trace for this cpu.
405 401
406 if (sp == NULL) { 402 if (sp == NULL) {
407 if (tsk) 403 if (task)
408 sp = (unsigned long *)tsk->thread.sp; 404 sp = (unsigned long *)task->thread.sp;
409 else 405 else
410 sp = (unsigned long *)&sp; 406 sp = (unsigned long *)&sp;
411 } 407 }
412 408
413 stack = sp; 409 stack = sp;
414 for(i=0; i < kstack_depth_to_print; i++) { 410 for (i = 0; i < kstack_depth_to_print; i++) {
415 if (stack >= irqstack && stack <= irqstack_end) { 411 if (stack >= irqstack && stack <= irqstack_end) {
416 if (stack == irqstack_end) { 412 if (stack == irqstack_end) {
417 stack = (unsigned long *) (irqstack_end[-1]); 413 stack = (unsigned long *) (irqstack_end[-1]);
@@ -426,12 +422,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
426 printk(" %016lx", *stack++); 422 printk(" %016lx", *stack++);
427 touch_nmi_watchdog(); 423 touch_nmi_watchdog();
428 } 424 }
429 show_trace(tsk, regs, sp, bp); 425 show_trace(task, regs, sp, bp);
430} 426}
431 427
432void show_stack(struct task_struct *tsk, unsigned long * sp) 428void show_stack(struct task_struct *task, unsigned long *sp)
433{ 429{
434 _show_stack(tsk, NULL, sp, 0); 430 _show_stack(task, NULL, sp, 0);
435} 431}
436 432
437/* 433/*
@@ -439,8 +435,8 @@ void show_stack(struct task_struct *tsk, unsigned long * sp)
439 */ 435 */
440void dump_stack(void) 436void dump_stack(void)
441{ 437{
442 unsigned long dummy;
443 unsigned long bp = 0; 438 unsigned long bp = 0;
439 unsigned long stack;
444 440
445#ifdef CONFIG_FRAME_POINTER 441#ifdef CONFIG_FRAME_POINTER
446 if (!bp) 442 if (!bp)
@@ -452,7 +448,7 @@ void dump_stack(void)
452 init_utsname()->release, 448 init_utsname()->release,
453 (int)strcspn(init_utsname()->version, " "), 449 (int)strcspn(init_utsname()->version, " "),
454 init_utsname()->version); 450 init_utsname()->version);
455 show_trace(NULL, NULL, &dummy, bp); 451 show_trace(NULL, NULL, &stack, bp);
456} 452}
457 453
458EXPORT_SYMBOL(dump_stack); 454EXPORT_SYMBOL(dump_stack);
@@ -463,12 +459,8 @@ void show_registers(struct pt_regs *regs)
463 unsigned long sp; 459 unsigned long sp;
464 const int cpu = smp_processor_id(); 460 const int cpu = smp_processor_id();
465 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 461 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
466 u8 *ip;
467 unsigned int code_prologue = code_bytes * 43 / 64;
468 unsigned int code_len = code_bytes;
469 462
470 sp = regs->sp; 463 sp = regs->sp;
471 ip = (u8 *) regs->ip - code_prologue;
472 printk("CPU %d ", cpu); 464 printk("CPU %d ", cpu);
473 __show_regs(regs); 465 __show_regs(regs);
474 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 466 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -479,15 +471,21 @@ void show_registers(struct pt_regs *regs)
479 * time of the fault.. 471 * time of the fault..
480 */ 472 */
481 if (!user_mode(regs)) { 473 if (!user_mode(regs)) {
474 unsigned int code_prologue = code_bytes * 43 / 64;
475 unsigned int code_len = code_bytes;
482 unsigned char c; 476 unsigned char c;
477 u8 *ip;
478
483 printk("Stack: "); 479 printk("Stack: ");
484 _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); 480 _show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
485 printk("\n"); 481 printk("\n");
486 482
487 printk(KERN_EMERG "Code: "); 483 printk(KERN_EMERG "Code: ");
484
485 ip = (u8 *)regs->ip - code_prologue;
488 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { 486 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
489 /* try starting at RIP */ 487 /* try starting at RIP */
490 ip = (u8 *) regs->ip; 488 ip = (u8 *)regs->ip;
491 code_len = code_len - code_prologue + 1; 489 code_len = code_len - code_prologue + 1;
492 } 490 }
493 for (i = 0; i < code_len; i++, ip++) { 491 for (i = 0; i < code_len; i++, ip++) {
@@ -503,7 +501,7 @@ void show_registers(struct pt_regs *regs)
503 } 501 }
504 } 502 }
505 printk("\n"); 503 printk("\n");
506} 504}
507 505
508int is_valid_bugaddr(unsigned long ip) 506int is_valid_bugaddr(unsigned long ip)
509{ 507{
@@ -561,10 +559,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
561 do_exit(signr); 559 do_exit(signr);
562} 560}
563 561
564int __kprobes __die(const char * str, struct pt_regs * regs, long err) 562int __kprobes __die(const char *str, struct pt_regs *regs, long err)
565{ 563{
566 static int die_counter; 564 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
567 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
568#ifdef CONFIG_PREEMPT 565#ifdef CONFIG_PREEMPT
569 printk("PREEMPT "); 566 printk("PREEMPT ");
570#endif 567#endif
@@ -575,8 +572,10 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
575 printk("DEBUG_PAGEALLOC"); 572 printk("DEBUG_PAGEALLOC");
576#endif 573#endif
577 printk("\n"); 574 printk("\n");
578 if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) 575 if (notify_die(DIE_OOPS, str, regs, err,
576 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
579 return 1; 577 return 1;
578
580 show_registers(regs); 579 show_registers(regs);
581 add_taint(TAINT_DIE); 580 add_taint(TAINT_DIE);
582 /* Executive summary in case the oops scrolled away */ 581 /* Executive summary in case the oops scrolled away */
@@ -588,7 +587,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
588 return 0; 587 return 0;
589} 588}
590 589
591void die(const char * str, struct pt_regs * regs, long err) 590void die(const char *str, struct pt_regs *regs, long err)
592{ 591{
593 unsigned long flags = oops_begin(); 592 unsigned long flags = oops_begin();
594 593
@@ -605,8 +604,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
605{ 604{
606 unsigned long flags; 605 unsigned long flags;
607 606
608 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == 607 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
609 NOTIFY_STOP)
610 return; 608 return;
611 609
612 flags = oops_begin(); 610 flags = oops_begin();
@@ -614,7 +612,9 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
614 * We are in trouble anyway, lets at least try 612 * We are in trouble anyway, lets at least try
615 * to get a message out. 613 * to get a message out.
616 */ 614 */
617 printk(str, smp_processor_id()); 615 printk(KERN_EMERG "%s", str);
616 printk(" on CPU%d, ip %08lx, registers:\n",
617 smp_processor_id(), regs->ip);
618 show_registers(regs); 618 show_registers(regs);
619 if (kexec_should_crash(current)) 619 if (kexec_should_crash(current))
620 crash_kexec(regs); 620 crash_kexec(regs);
@@ -626,44 +626,44 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
626 do_exit(SIGBUS); 626 do_exit(SIGBUS);
627} 627}
628 628
629static void __kprobes do_trap(int trapnr, int signr, char *str, 629static void __kprobes
630 struct pt_regs * regs, long error_code, 630do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
631 siginfo_t *info) 631 long error_code, siginfo_t *info)
632{ 632{
633 struct task_struct *tsk = current; 633 struct task_struct *tsk = current;
634 634
635 if (user_mode(regs)) { 635 if (!user_mode(regs))
636 /* 636 goto kernel_trap;
637 * We want error_code and trap_no set for userspace
638 * faults and kernelspace faults which result in
639 * die(), but not kernelspace faults which are fixed
640 * up. die() gives the process no chance to handle
641 * the signal and notice the kernel fault information,
642 * so that won't result in polluting the information
643 * about previously queued, but not yet delivered,
644 * faults. See also do_general_protection below.
645 */
646 tsk->thread.error_code = error_code;
647 tsk->thread.trap_no = trapnr;
648
649 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
650 printk_ratelimit()) {
651 printk(KERN_INFO
652 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
653 tsk->comm, tsk->pid, str,
654 regs->ip, regs->sp, error_code);
655 print_vma_addr(" in ", regs->ip);
656 printk("\n");
657 }
658 637
659 if (info) 638 /*
660 force_sig_info(signr, info, tsk); 639 * We want error_code and trap_no set for userspace faults and
661 else 640 * kernelspace faults which result in die(), but not
662 force_sig(signr, tsk); 641 * kernelspace faults which are fixed up. die() gives the
663 return; 642 * process no chance to handle the signal and notice the
643 * kernel fault information, so that won't result in polluting
644 * the information about previously queued, but not yet
645 * delivered, faults. See also do_general_protection below.
646 */
647 tsk->thread.error_code = error_code;
648 tsk->thread.trap_no = trapnr;
649
650 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
651 printk_ratelimit()) {
652 printk(KERN_INFO
653 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
654 tsk->comm, tsk->pid, str,
655 regs->ip, regs->sp, error_code);
656 print_vma_addr(" in ", regs->ip);
657 printk("\n");
664 } 658 }
665 659
660 if (info)
661 force_sig_info(signr, info, tsk);
662 else
663 force_sig(signr, tsk);
664 return;
666 665
666kernel_trap:
667 if (!fixup_exception(regs)) { 667 if (!fixup_exception(regs)) {
668 tsk->thread.error_code = error_code; 668 tsk->thread.error_code = error_code;
669 tsk->thread.trap_no = trapnr; 669 tsk->thread.trap_no = trapnr;
@@ -673,41 +673,39 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
673} 673}
674 674
675#define DO_ERROR(trapnr, signr, str, name) \ 675#define DO_ERROR(trapnr, signr, str, name) \
676asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 676asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
677{ \ 677{ \
678 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 678 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
679 == NOTIFY_STOP) \ 679 == NOTIFY_STOP) \
680 return; \ 680 return; \
681 conditional_sti(regs); \ 681 conditional_sti(regs); \
682 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 682 do_trap(trapnr, signr, str, regs, error_code, NULL); \
683} 683}
684 684
685#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 685#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
686asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 686asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
687{ \ 687{ \
688 siginfo_t info; \ 688 siginfo_t info; \
689 info.si_signo = signr; \ 689 info.si_signo = signr; \
690 info.si_errno = 0; \ 690 info.si_errno = 0; \
691 info.si_code = sicode; \ 691 info.si_code = sicode; \
692 info.si_addr = (void __user *)siaddr; \ 692 info.si_addr = (void __user *)siaddr; \
693 trace_hardirqs_fixup(); \ 693 trace_hardirqs_fixup(); \
694 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 694 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
695 == NOTIFY_STOP) \ 695 == NOTIFY_STOP) \
696 return; \ 696 return; \
697 conditional_sti(regs); \ 697 conditional_sti(regs); \
698 do_trap(trapnr, signr, str, regs, error_code, &info); \ 698 do_trap(trapnr, signr, str, regs, error_code, &info); \
699} 699}
700 700
701DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) 701DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
702DO_ERROR( 4, SIGSEGV, "overflow", overflow) 702DO_ERROR(4, SIGSEGV, "overflow", overflow)
703DO_ERROR( 5, SIGSEGV, "bounds", bounds) 703DO_ERROR(5, SIGSEGV, "bounds", bounds)
704DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) 704DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
705DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) 705DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
706DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
707DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 706DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
708DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 707DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
709DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 708DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
710DO_ERROR(18, SIGSEGV, "reserved", reserved)
711 709
712/* Runs on IST stack */ 710/* Runs on IST stack */
713asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) 711asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
@@ -737,31 +735,34 @@ asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
737 die(str, regs, error_code); 735 die(str, regs, error_code);
738} 736}
739 737
740asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, 738asmlinkage void __kprobes
741 long error_code) 739do_general_protection(struct pt_regs *regs, long error_code)
742{ 740{
743 struct task_struct *tsk = current; 741 struct task_struct *tsk;
744 742
745 conditional_sti(regs); 743 conditional_sti(regs);
746 744
747 if (user_mode(regs)) { 745 tsk = current;
748 tsk->thread.error_code = error_code; 746 if (!user_mode(regs))
749 tsk->thread.trap_no = 13; 747 goto gp_in_kernel;
750
751 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
752 printk_ratelimit()) {
753 printk(KERN_INFO
754 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
755 tsk->comm, tsk->pid,
756 regs->ip, regs->sp, error_code);
757 print_vma_addr(" in ", regs->ip);
758 printk("\n");
759 }
760 748
761 force_sig(SIGSEGV, tsk); 749 tsk->thread.error_code = error_code;
762 return; 750 tsk->thread.trap_no = 13;
763 } 751
752 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
753 printk_ratelimit()) {
754 printk(KERN_INFO
755 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
756 tsk->comm, tsk->pid,
757 regs->ip, regs->sp, error_code);
758 print_vma_addr(" in ", regs->ip);
759 printk("\n");
760 }
761
762 force_sig(SIGSEGV, tsk);
763 return;
764 764
765gp_in_kernel:
765 if (fixup_exception(regs)) 766 if (fixup_exception(regs))
766 return; 767 return;
767 768
@@ -774,14 +775,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
774} 775}
775 776
776static notrace __kprobes void 777static notrace __kprobes void
777mem_parity_error(unsigned char reason, struct pt_regs * regs) 778mem_parity_error(unsigned char reason, struct pt_regs *regs)
778{ 779{
779 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 780 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
780 reason); 781 reason);
781 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); 782 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
782 783
783#if defined(CONFIG_EDAC) 784#if defined(CONFIG_EDAC)
784 if(edac_handler_set()) { 785 if (edac_handler_set()) {
785 edac_atomic_assert_error(); 786 edac_atomic_assert_error();
786 return; 787 return;
787 } 788 }
@@ -798,7 +799,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
798} 799}
799 800
800static notrace __kprobes void 801static notrace __kprobes void
801io_check_error(unsigned char reason, struct pt_regs * regs) 802io_check_error(unsigned char reason, struct pt_regs *regs)
802{ 803{
803 printk("NMI: IOCK error (debug interrupt?)\n"); 804 printk("NMI: IOCK error (debug interrupt?)\n");
804 show_registers(regs); 805 show_registers(regs);
@@ -828,14 +829,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
828 829
829/* Runs on IST stack. This code must keep interrupts off all the time. 830/* Runs on IST stack. This code must keep interrupts off all the time.
830 Nested NMIs are prevented by the CPU. */ 831 Nested NMIs are prevented by the CPU. */
831asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) 832asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
832{ 833{
833 unsigned char reason = 0; 834 unsigned char reason = 0;
834 int cpu; 835 int cpu;
835 836
836 cpu = smp_processor_id(); 837 cpu = smp_processor_id();
837 838
838 /* Only the BSP gets external NMIs from the system. */ 839 /* Only the BSP gets external NMIs from the system. */
839 if (!cpu) 840 if (!cpu)
840 reason = get_nmi_reason(); 841 reason = get_nmi_reason();
841 842
@@ -847,32 +848,57 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
847 * Ok, so this is none of the documented NMI sources, 848 * Ok, so this is none of the documented NMI sources,
848 * so it must be the NMI watchdog. 849 * so it must be the NMI watchdog.
849 */ 850 */
850 if (nmi_watchdog_tick(regs,reason)) 851 if (nmi_watchdog_tick(regs, reason))
851 return; 852 return;
852 if (!do_nmi_callback(regs,cpu)) 853 if (!do_nmi_callback(regs, cpu))
853 unknown_nmi_error(reason, regs); 854 unknown_nmi_error(reason, regs);
854 855
855 return; 856 return;
856 } 857 }
857 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 858 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
858 return; 859 return;
859 860
860 /* AK: following checks seem to be broken on modern chipsets. FIXME */ 861 /* AK: following checks seem to be broken on modern chipsets. FIXME */
861
862 if (reason & 0x80) 862 if (reason & 0x80)
863 mem_parity_error(reason, regs); 863 mem_parity_error(reason, regs);
864 if (reason & 0x40) 864 if (reason & 0x40)
865 io_check_error(reason, regs); 865 io_check_error(reason, regs);
866} 866}
867 867
868asmlinkage notrace __kprobes void
869do_nmi(struct pt_regs *regs, long error_code)
870{
871 nmi_enter();
872
873 add_pda(__nmi_count, 1);
874
875 if (!ignore_nmis)
876 default_do_nmi(regs);
877
878 nmi_exit();
879}
880
881void stop_nmi(void)
882{
883 acpi_nmi_disable();
884 ignore_nmis++;
885}
886
887void restart_nmi(void)
888{
889 ignore_nmis--;
890 acpi_nmi_enable();
891}
892
868/* runs on IST stack. */ 893/* runs on IST stack. */
869asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) 894asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
870{ 895{
871 trace_hardirqs_fixup(); 896 trace_hardirqs_fixup();
872 897
873 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { 898 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
899 == NOTIFY_STOP)
874 return; 900 return;
875 } 901
876 preempt_conditional_sti(regs); 902 preempt_conditional_sti(regs);
877 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 903 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
878 preempt_conditional_cli(regs); 904 preempt_conditional_cli(regs);
@@ -903,8 +929,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
903asmlinkage void __kprobes do_debug(struct pt_regs * regs, 929asmlinkage void __kprobes do_debug(struct pt_regs * regs,
904 unsigned long error_code) 930 unsigned long error_code)
905{ 931{
906 unsigned long condition;
907 struct task_struct *tsk = current; 932 struct task_struct *tsk = current;
933 unsigned long condition;
908 siginfo_t info; 934 siginfo_t info;
909 935
910 trace_hardirqs_fixup(); 936 trace_hardirqs_fixup();
@@ -925,21 +951,19 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
925 951
926 /* Mask out spurious debug traps due to lazy DR7 setting */ 952 /* Mask out spurious debug traps due to lazy DR7 setting */
927 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 953 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
928 if (!tsk->thread.debugreg7) { 954 if (!tsk->thread.debugreg7)
929 goto clear_dr7; 955 goto clear_dr7;
930 }
931 } 956 }
932 957
933 tsk->thread.debugreg6 = condition; 958 tsk->thread.debugreg6 = condition;
934 959
935
936 /* 960 /*
937 * Single-stepping through TF: make sure we ignore any events in 961 * Single-stepping through TF: make sure we ignore any events in
938 * kernel space (but re-enable TF when returning to user mode). 962 * kernel space (but re-enable TF when returning to user mode).
939 */ 963 */
940 if (condition & DR_STEP) { 964 if (condition & DR_STEP) {
941 if (!user_mode(regs)) 965 if (!user_mode(regs))
942 goto clear_TF_reenable; 966 goto clear_TF_reenable;
943 } 967 }
944 968
945 /* Ok, finally something we can handle */ 969 /* Ok, finally something we can handle */
@@ -952,7 +976,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
952 force_sig_info(SIGTRAP, &info, tsk); 976 force_sig_info(SIGTRAP, &info, tsk);
953 977
954clear_dr7: 978clear_dr7:
955 set_debugreg(0UL, 7); 979 set_debugreg(0, 7);
956 preempt_conditional_cli(regs); 980 preempt_conditional_cli(regs);
957 return; 981 return;
958 982
@@ -960,6 +984,7 @@ clear_TF_reenable:
960 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 984 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
961 regs->flags &= ~X86_EFLAGS_TF; 985 regs->flags &= ~X86_EFLAGS_TF;
962 preempt_conditional_cli(regs); 986 preempt_conditional_cli(regs);
987 return;
963} 988}
964 989
965static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) 990static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
@@ -982,7 +1007,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
982asmlinkage void do_coprocessor_error(struct pt_regs *regs) 1007asmlinkage void do_coprocessor_error(struct pt_regs *regs)
983{ 1008{
984 void __user *ip = (void __user *)(regs->ip); 1009 void __user *ip = (void __user *)(regs->ip);
985 struct task_struct * task; 1010 struct task_struct *task;
986 siginfo_t info; 1011 siginfo_t info;
987 unsigned short cwd, swd; 1012 unsigned short cwd, swd;
988 1013
@@ -1015,30 +1040,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
1015 cwd = get_fpu_cwd(task); 1040 cwd = get_fpu_cwd(task);
1016 swd = get_fpu_swd(task); 1041 swd = get_fpu_swd(task);
1017 switch (swd & ~cwd & 0x3f) { 1042 switch (swd & ~cwd & 0x3f) {
1018 case 0x000: 1043 case 0x000: /* No unmasked exception */
1019 default: 1044 default: /* Multiple exceptions */
1020 break; 1045 break;
1021 case 0x001: /* Invalid Op */ 1046 case 0x001: /* Invalid Op */
1022 /* 1047 /*
1023 * swd & 0x240 == 0x040: Stack Underflow 1048 * swd & 0x240 == 0x040: Stack Underflow
1024 * swd & 0x240 == 0x240: Stack Overflow 1049 * swd & 0x240 == 0x240: Stack Overflow
1025 * User must clear the SF bit (0x40) if set 1050 * User must clear the SF bit (0x40) if set
1026 */ 1051 */
1027 info.si_code = FPE_FLTINV; 1052 info.si_code = FPE_FLTINV;
1028 break; 1053 break;
1029 case 0x002: /* Denormalize */ 1054 case 0x002: /* Denormalize */
1030 case 0x010: /* Underflow */ 1055 case 0x010: /* Underflow */
1031 info.si_code = FPE_FLTUND; 1056 info.si_code = FPE_FLTUND;
1032 break; 1057 break;
1033 case 0x004: /* Zero Divide */ 1058 case 0x004: /* Zero Divide */
1034 info.si_code = FPE_FLTDIV; 1059 info.si_code = FPE_FLTDIV;
1035 break; 1060 break;
1036 case 0x008: /* Overflow */ 1061 case 0x008: /* Overflow */
1037 info.si_code = FPE_FLTOVF; 1062 info.si_code = FPE_FLTOVF;
1038 break; 1063 break;
1039 case 0x020: /* Precision */ 1064 case 0x020: /* Precision */
1040 info.si_code = FPE_FLTRES; 1065 info.si_code = FPE_FLTRES;
1041 break; 1066 break;
1042 } 1067 }
1043 force_sig_info(SIGFPE, &info, task); 1068 force_sig_info(SIGFPE, &info, task);
1044} 1069}
@@ -1051,7 +1076,7 @@ asmlinkage void bad_intr(void)
1051asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) 1076asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1052{ 1077{
1053 void __user *ip = (void __user *)(regs->ip); 1078 void __user *ip = (void __user *)(regs->ip);
1054 struct task_struct * task; 1079 struct task_struct *task;
1055 siginfo_t info; 1080 siginfo_t info;
1056 unsigned short mxcsr; 1081 unsigned short mxcsr;
1057 1082
@@ -1079,25 +1104,25 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1079 */ 1104 */
1080 mxcsr = get_fpu_mxcsr(task); 1105 mxcsr = get_fpu_mxcsr(task);
1081 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { 1106 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
1082 case 0x000: 1107 case 0x000:
1083 default: 1108 default:
1084 break; 1109 break;
1085 case 0x001: /* Invalid Op */ 1110 case 0x001: /* Invalid Op */
1086 info.si_code = FPE_FLTINV; 1111 info.si_code = FPE_FLTINV;
1087 break; 1112 break;
1088 case 0x002: /* Denormalize */ 1113 case 0x002: /* Denormalize */
1089 case 0x010: /* Underflow */ 1114 case 0x010: /* Underflow */
1090 info.si_code = FPE_FLTUND; 1115 info.si_code = FPE_FLTUND;
1091 break; 1116 break;
1092 case 0x004: /* Zero Divide */ 1117 case 0x004: /* Zero Divide */
1093 info.si_code = FPE_FLTDIV; 1118 info.si_code = FPE_FLTDIV;
1094 break; 1119 break;
1095 case 0x008: /* Overflow */ 1120 case 0x008: /* Overflow */
1096 info.si_code = FPE_FLTOVF; 1121 info.si_code = FPE_FLTOVF;
1097 break; 1122 break;
1098 case 0x020: /* Precision */ 1123 case 0x020: /* Precision */
1099 info.si_code = FPE_FLTRES; 1124 info.si_code = FPE_FLTRES;
1100 break; 1125 break;
1101 } 1126 }
1102 force_sig_info(SIGFPE, &info, task); 1127 force_sig_info(SIGFPE, &info, task);
1103} 1128}
@@ -1115,7 +1140,7 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1115} 1140}
1116 1141
1117/* 1142/*
1118 * 'math_state_restore()' saves the current math information in the 1143 * 'math_state_restore()' saves the current math information in the
1119 * old math state array, and gets the new ones from the current task 1144 * old math state array, and gets the new ones from the current task
1120 * 1145 *
1121 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 1146 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
@@ -1140,7 +1165,7 @@ asmlinkage void math_state_restore(void)
1140 local_irq_disable(); 1165 local_irq_disable();
1141 } 1166 }
1142 1167
1143 clts(); /* Allow maths ops (or we recurse) */ 1168 clts(); /* Allow maths ops (or we recurse) */
1144 restore_fpu_checking(&me->thread.xstate->fxsave); 1169 restore_fpu_checking(&me->thread.xstate->fxsave);
1145 task_thread_info(me)->status |= TS_USEDFPU; 1170 task_thread_info(me)->status |= TS_USEDFPU;
1146 me->fpu_counter++; 1171 me->fpu_counter++;
@@ -1149,64 +1174,61 @@ EXPORT_SYMBOL_GPL(math_state_restore);
1149 1174
1150void __init trap_init(void) 1175void __init trap_init(void)
1151{ 1176{
1152 set_intr_gate(0,&divide_error); 1177 set_intr_gate(0, &divide_error);
1153 set_intr_gate_ist(1,&debug,DEBUG_STACK); 1178 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1154 set_intr_gate_ist(2,&nmi,NMI_STACK); 1179 set_intr_gate_ist(2, &nmi, NMI_STACK);
1155 set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ 1180 set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
1156 set_system_gate(4,&overflow); /* int4 can be called from all */ 1181 set_system_gate(4, &overflow); /* int4 can be called from all */
1157 set_intr_gate(5,&bounds); 1182 set_intr_gate(5, &bounds);
1158 set_intr_gate(6,&invalid_op); 1183 set_intr_gate(6, &invalid_op);
1159 set_intr_gate(7,&device_not_available); 1184 set_intr_gate(7, &device_not_available);
1160 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); 1185 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
1161 set_intr_gate(9,&coprocessor_segment_overrun); 1186 set_intr_gate(9, &coprocessor_segment_overrun);
1162 set_intr_gate(10,&invalid_TSS); 1187 set_intr_gate(10, &invalid_TSS);
1163 set_intr_gate(11,&segment_not_present); 1188 set_intr_gate(11, &segment_not_present);
1164 set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); 1189 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
1165 set_intr_gate(13,&general_protection); 1190 set_intr_gate(13, &general_protection);
1166 set_intr_gate(14,&page_fault); 1191 set_intr_gate(14, &page_fault);
1167 set_intr_gate(15,&spurious_interrupt_bug); 1192 set_intr_gate(15, &spurious_interrupt_bug);
1168 set_intr_gate(16,&coprocessor_error); 1193 set_intr_gate(16, &coprocessor_error);
1169 set_intr_gate(17,&alignment_check); 1194 set_intr_gate(17, &alignment_check);
1170#ifdef CONFIG_X86_MCE 1195#ifdef CONFIG_X86_MCE
1171 set_intr_gate_ist(18,&machine_check, MCE_STACK); 1196 set_intr_gate_ist(18, &machine_check, MCE_STACK);
1172#endif 1197#endif
1173 set_intr_gate(19,&simd_coprocessor_error); 1198 set_intr_gate(19, &simd_coprocessor_error);
1174 1199
1175#ifdef CONFIG_IA32_EMULATION 1200#ifdef CONFIG_IA32_EMULATION
1176 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1201 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1177#endif 1202#endif
1178
1179 /* 1203 /*
1180 * initialize the per thread extended state: 1204 * initialize the per thread extended state:
1181 */ 1205 */
1182 init_thread_xstate(); 1206 init_thread_xstate();
1183 /* 1207 /*
1184 * Should be a barrier for any external CPU state. 1208 * Should be a barrier for any external CPU state:
1185 */ 1209 */
1186 cpu_init(); 1210 cpu_init();
1187} 1211}
1188 1212
1189
1190static int __init oops_setup(char *s) 1213static int __init oops_setup(char *s)
1191{ 1214{
1192 if (!s) 1215 if (!s)
1193 return -EINVAL; 1216 return -EINVAL;
1194 if (!strcmp(s, "panic")) 1217 if (!strcmp(s, "panic"))
1195 panic_on_oops = 1; 1218 panic_on_oops = 1;
1196 return 0; 1219 return 0;
1197} 1220}
1198early_param("oops", oops_setup); 1221early_param("oops", oops_setup);
1199 1222
1200static int __init kstack_setup(char *s) 1223static int __init kstack_setup(char *s)
1201{ 1224{
1202 if (!s) 1225 if (!s)
1203 return -EINVAL; 1226 return -EINVAL;
1204 kstack_depth_to_print = simple_strtoul(s,NULL,0); 1227 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1205 return 0; 1228 return 0;
1206} 1229}
1207early_param("kstack", kstack_setup); 1230early_param("kstack", kstack_setup);
1208 1231
1209
1210static int __init code_bytes_setup(char *s) 1232static int __init code_bytes_setup(char *s)
1211{ 1233{
1212 code_bytes = simple_strtoul(s, NULL, 0); 1234 code_bytes = simple_strtoul(s, NULL, 0);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
new file mode 100644
index 000000000000..3c36f92160c9
--- /dev/null
+++ b/arch/x86/kernel/tsc.c
@@ -0,0 +1,533 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <linux/timer.h>
6#include <linux/acpi_pmtmr.h>
7#include <linux/cpufreq.h>
8#include <linux/dmi.h>
9#include <linux/delay.h>
10#include <linux/clocksource.h>
11#include <linux/percpu.h>
12
13#include <asm/hpet.h>
14#include <asm/timer.h>
15#include <asm/vgtod.h>
16#include <asm/time.h>
17#include <asm/delay.h>
18
19unsigned int cpu_khz; /* TSC clocks / usec, not used here */
20EXPORT_SYMBOL(cpu_khz);
21unsigned int tsc_khz;
22EXPORT_SYMBOL(tsc_khz);
23
24/*
25 * TSC can be unstable due to cpufreq or due to unsynced TSCs
26 */
27static int tsc_unstable;
28
29/* native_sched_clock() is called before tsc_init(), so
30 we must start with the TSC soft disabled to prevent
31 erroneous rdtsc usage on !cpu_has_tsc processors */
32static int tsc_disabled = -1;
33
34/*
35 * Scheduler clock - returns current time in nanosec units.
36 */
37u64 native_sched_clock(void)
38{
39 u64 this_offset;
40
41 /*
42 * Fall back to jiffies if there's no TSC available:
43 * ( But note that we still use it if the TSC is marked
44 * unstable. We do this because unlike Time Of Day,
45 * the scheduler clock tolerates small errors and it's
46 * very important for it to be as fast as the platform
47 * can achive it. )
48 */
49 if (unlikely(tsc_disabled)) {
50 /* No locking but a rare wrong value is not a big deal: */
51 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
52 }
53
54 /* read the Time Stamp Counter: */
55 rdtscll(this_offset);
56
57 /* return the value in ns */
58 return cycles_2_ns(this_offset);
59}
60
61/* We need to define a real function for sched_clock, to override the
62 weak default version */
63#ifdef CONFIG_PARAVIRT
64unsigned long long sched_clock(void)
65{
66 return paravirt_sched_clock();
67}
68#else
69unsigned long long
70sched_clock(void) __attribute__((alias("native_sched_clock")));
71#endif
72
73int check_tsc_unstable(void)
74{
75 return tsc_unstable;
76}
77EXPORT_SYMBOL_GPL(check_tsc_unstable);
78
79#ifdef CONFIG_X86_TSC
80int __init notsc_setup(char *str)
81{
82 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
83 "cannot disable TSC completely.\n");
84 tsc_disabled = 1;
85 return 1;
86}
87#else
88/*
89 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
90 * in cpu/common.c
91 */
92int __init notsc_setup(char *str)
93{
94 setup_clear_cpu_cap(X86_FEATURE_TSC);
95 return 1;
96}
97#endif
98
99__setup("notsc", notsc_setup);
100
101#define MAX_RETRIES 5
102#define SMI_TRESHOLD 50000
103
104/*
105 * Read TSC and the reference counters. Take care of SMI disturbance
106 */
107static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
108{
109 u64 t1, t2;
110 int i;
111
112 for (i = 0; i < MAX_RETRIES; i++) {
113 t1 = get_cycles();
114 if (hpet)
115 *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
116 else
117 *pm = acpi_pm_read_early();
118 t2 = get_cycles();
119 if ((t2 - t1) < SMI_TRESHOLD)
120 return t2;
121 }
122 return ULLONG_MAX;
123}
124
125/**
126 * native_calibrate_tsc - calibrate the tsc on boot
127 */
128unsigned long native_calibrate_tsc(void)
129{
130 unsigned long flags;
131 u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
132 int hpet = is_hpet_enabled();
133 unsigned int tsc_khz_val = 0;
134
135 local_irq_save(flags);
136
137 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
138
139 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
140
141 outb(0xb0, 0x43);
142 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
143 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
144 tr1 = get_cycles();
145 while ((inb(0x61) & 0x20) == 0);
146 tr2 = get_cycles();
147
148 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
149
150 local_irq_restore(flags);
151
152 /*
153 * Preset the result with the raw and inaccurate PIT
154 * calibration value
155 */
156 delta = (tr2 - tr1);
157 do_div(delta, 50);
158 tsc_khz_val = delta;
159
160 /* hpet or pmtimer available ? */
161 if (!hpet && !pm1 && !pm2) {
162 printk(KERN_INFO "TSC calibrated against PIT\n");
163 goto out;
164 }
165
166 /* Check, whether the sampling was disturbed by an SMI */
167 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
168 printk(KERN_WARNING "TSC calibration disturbed by SMI, "
169 "using PIT calibration result\n");
170 goto out;
171 }
172
173 tsc2 = (tsc2 - tsc1) * 1000000LL;
174
175 if (hpet) {
176 printk(KERN_INFO "TSC calibrated against HPET\n");
177 if (hpet2 < hpet1)
178 hpet2 += 0x100000000ULL;
179 hpet2 -= hpet1;
180 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
181 do_div(tsc1, 1000000);
182 } else {
183 printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
184 if (pm2 < pm1)
185 pm2 += (u64)ACPI_PM_OVRRUN;
186 pm2 -= pm1;
187 tsc1 = pm2 * 1000000000LL;
188 do_div(tsc1, PMTMR_TICKS_PER_SEC);
189 }
190
191 do_div(tsc2, tsc1);
192 tsc_khz_val = tsc2;
193
194out:
195 return tsc_khz_val;
196}
197
198
199#ifdef CONFIG_X86_32
200/* Only called from the Powernow K7 cpu freq driver */
201int recalibrate_cpu_khz(void)
202{
203#ifndef CONFIG_SMP
204 unsigned long cpu_khz_old = cpu_khz;
205
206 if (cpu_has_tsc) {
207 tsc_khz = calibrate_tsc();
208 cpu_khz = tsc_khz;
209 cpu_data(0).loops_per_jiffy =
210 cpufreq_scale(cpu_data(0).loops_per_jiffy,
211 cpu_khz_old, cpu_khz);
212 return 0;
213 } else
214 return -ENODEV;
215#else
216 return -ENODEV;
217#endif
218}
219
220EXPORT_SYMBOL(recalibrate_cpu_khz);
221
222#endif /* CONFIG_X86_32 */
223
224/* Accelerators for sched_clock()
225 * convert from cycles(64bits) => nanoseconds (64bits)
226 * basic equation:
227 * ns = cycles / (freq / ns_per_sec)
228 * ns = cycles * (ns_per_sec / freq)
229 * ns = cycles * (10^9 / (cpu_khz * 10^3))
230 * ns = cycles * (10^6 / cpu_khz)
231 *
232 * Then we use scaling math (suggested by george@mvista.com) to get:
233 * ns = cycles * (10^6 * SC / cpu_khz) / SC
234 * ns = cycles * cyc2ns_scale / SC
235 *
236 * And since SC is a constant power of two, we can convert the div
237 * into a shift.
238 *
239 * We can use khz divisor instead of mhz to keep a better precision, since
240 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
241 * (mathieu.desnoyers@polymtl.ca)
242 *
243 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
244 */
245
246DEFINE_PER_CPU(unsigned long, cyc2ns);
247
248static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
249{
250 unsigned long long tsc_now, ns_now;
251 unsigned long flags, *scale;
252
253 local_irq_save(flags);
254 sched_clock_idle_sleep_event();
255
256 scale = &per_cpu(cyc2ns, cpu);
257
258 rdtscll(tsc_now);
259 ns_now = __cycles_2_ns(tsc_now);
260
261 if (cpu_khz)
262 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
263
264 sched_clock_idle_wakeup_event(0);
265 local_irq_restore(flags);
266}
267
268#ifdef CONFIG_CPU_FREQ
269
270/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
271 * changes.
272 *
273 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
274 * not that important because current Opteron setups do not support
275 * scaling on SMP anyroads.
276 *
277 * Should fix up last_tsc too. Currently gettimeofday in the
278 * first tick after the change will be slightly wrong.
279 */
280
281static unsigned int ref_freq;
282static unsigned long loops_per_jiffy_ref;
283static unsigned long tsc_khz_ref;
284
285static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
286 void *data)
287{
288 struct cpufreq_freqs *freq = data;
289 unsigned long *lpj, dummy;
290
291 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
292 return 0;
293
294 lpj = &dummy;
295 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
296#ifdef CONFIG_SMP
297 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
298#else
299 lpj = &boot_cpu_data.loops_per_jiffy;
300#endif
301
302 if (!ref_freq) {
303 ref_freq = freq->old;
304 loops_per_jiffy_ref = *lpj;
305 tsc_khz_ref = tsc_khz;
306 }
307 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
308 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
309 (val == CPUFREQ_RESUMECHANGE)) {
310 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
311
312 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
313 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
314 mark_tsc_unstable("cpufreq changes");
315 }
316
317 set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
318
319 return 0;
320}
321
322static struct notifier_block time_cpufreq_notifier_block = {
323 .notifier_call = time_cpufreq_notifier
324};
325
326static int __init cpufreq_tsc(void)
327{
328 cpufreq_register_notifier(&time_cpufreq_notifier_block,
329 CPUFREQ_TRANSITION_NOTIFIER);
330 return 0;
331}
332
333core_initcall(cpufreq_tsc);
334
335#endif /* CONFIG_CPU_FREQ */
336
337/* clocksource code */
338
339static struct clocksource clocksource_tsc;
340
341/*
342 * We compare the TSC to the cycle_last value in the clocksource
343 * structure to avoid a nasty time-warp. This can be observed in a
344 * very small window right after one CPU updated cycle_last under
345 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
346 * is smaller than the cycle_last reference value due to a TSC which
347 * is slighty behind. This delta is nowhere else observable, but in
348 * that case it results in a forward time jump in the range of hours
349 * due to the unsigned delta calculation of the time keeping core
350 * code, which is necessary to support wrapping clocksources like pm
351 * timer.
352 */
353static cycle_t read_tsc(void)
354{
355 cycle_t ret = (cycle_t)get_cycles();
356
357 return ret >= clocksource_tsc.cycle_last ?
358 ret : clocksource_tsc.cycle_last;
359}
360
361static cycle_t __vsyscall_fn vread_tsc(void)
362{
363 cycle_t ret = (cycle_t)vget_cycles();
364
365 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
366 ret : __vsyscall_gtod_data.clock.cycle_last;
367}
368
369static struct clocksource clocksource_tsc = {
370 .name = "tsc",
371 .rating = 300,
372 .read = read_tsc,
373 .mask = CLOCKSOURCE_MASK(64),
374 .shift = 22,
375 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
376 CLOCK_SOURCE_MUST_VERIFY,
377#ifdef CONFIG_X86_64
378 .vread = vread_tsc,
379#endif
380};
381
382void mark_tsc_unstable(char *reason)
383{
384 if (!tsc_unstable) {
385 tsc_unstable = 1;
386 printk("Marking TSC unstable due to %s\n", reason);
387 /* Change only the rating, when not registered */
388 if (clocksource_tsc.mult)
389 clocksource_change_rating(&clocksource_tsc, 0);
390 else
391 clocksource_tsc.rating = 0;
392 }
393}
394
395EXPORT_SYMBOL_GPL(mark_tsc_unstable);
396
397static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
398{
399 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
400 d->ident);
401 tsc_unstable = 1;
402 return 0;
403}
404
405/* List of systems that have known TSC problems */
406static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
407 {
408 .callback = dmi_mark_tsc_unstable,
409 .ident = "IBM Thinkpad 380XD",
410 .matches = {
411 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
412 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
413 },
414 },
415 {}
416};
417
418/*
419 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
420 */
421#ifdef CONFIG_MGEODE_LX
422/* RTSC counts during suspend */
423#define RTSC_SUSP 0x100
424
425static void __init check_geode_tsc_reliable(void)
426{
427 unsigned long res_low, res_high;
428
429 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
430 if (res_low & RTSC_SUSP)
431 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
432}
433#else
434static inline void check_geode_tsc_reliable(void) { }
435#endif
436
437/*
438 * Make an educated guess if the TSC is trustworthy and synchronized
439 * over all CPUs.
440 */
441__cpuinit int unsynchronized_tsc(void)
442{
443 if (!cpu_has_tsc || tsc_unstable)
444 return 1;
445
446#ifdef CONFIG_SMP
447 if (apic_is_clustered_box())
448 return 1;
449#endif
450
451 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
452 return 0;
453 /*
454 * Intel systems are normally all synchronized.
455 * Exceptions must mark TSC as unstable:
456 */
457 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
458 /* assume multi socket systems are not synchronized: */
459 if (num_possible_cpus() > 1)
460 tsc_unstable = 1;
461 }
462
463 return tsc_unstable;
464}
465
466static void __init init_tsc_clocksource(void)
467{
468 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
469 clocksource_tsc.shift);
470 /* lower the rating if we already know its unstable: */
471 if (check_tsc_unstable()) {
472 clocksource_tsc.rating = 0;
473 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
474 }
475 clocksource_register(&clocksource_tsc);
476}
477
478void __init tsc_init(void)
479{
480 u64 lpj;
481 int cpu;
482
483 if (!cpu_has_tsc)
484 return;
485
486 tsc_khz = calibrate_tsc();
487 cpu_khz = tsc_khz;
488
489 if (!tsc_khz) {
490 mark_tsc_unstable("could not calculate TSC khz");
491 return;
492 }
493
494#ifdef CONFIG_X86_64
495 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
496 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
497 cpu_khz = calibrate_cpu();
498#endif
499
500 lpj = ((u64)tsc_khz * 1000);
501 do_div(lpj, HZ);
502 lpj_fine = lpj;
503
504 printk("Detected %lu.%03lu MHz processor.\n",
505 (unsigned long)cpu_khz / 1000,
506 (unsigned long)cpu_khz % 1000);
507
508 /*
509 * Secondary CPUs do not run through tsc_init(), so set up
510 * all the scale factors for all CPUs, assuming the same
511 * speed as the bootup CPU. (cpufreq notifiers will fix this
512 * up if their speed diverges)
513 */
514 for_each_possible_cpu(cpu)
515 set_cyc2ns_scale(cpu_khz, cpu);
516
517 if (tsc_disabled > 0)
518 return;
519
520 /* now allow native_sched_clock() to use rdtsc */
521 tsc_disabled = 0;
522
523 use_tsc_delay();
524 /* Check and install the TSC clocksource */
525 dmi_check_system(bad_tsc_dmi_table);
526
527 if (unsynchronized_tsc())
528 mark_tsc_unstable("TSCs unsynchronized");
529
530 check_geode_tsc_reliable();
531 init_tsc_clocksource();
532}
533
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
deleted file mode 100644
index 65b70637ad97..000000000000
--- a/arch/x86/kernel/tsc_32.c
+++ /dev/null
@@ -1,451 +0,0 @@
1#include <linux/sched.h>
2#include <linux/clocksource.h>
3#include <linux/workqueue.h>
4#include <linux/cpufreq.h>
5#include <linux/jiffies.h>
6#include <linux/init.h>
7#include <linux/dmi.h>
8#include <linux/percpu.h>
9
10#include <asm/delay.h>
11#include <asm/tsc.h>
12#include <asm/io.h>
13#include <asm/timer.h>
14
15#include "mach_timer.h"
16
17/* native_sched_clock() is called before tsc_init(), so
18 we must start with the TSC soft disabled to prevent
19 erroneous rdtsc usage on !cpu_has_tsc processors */
20static int tsc_disabled = -1;
21
22/*
23 * On some systems the TSC frequency does not
24 * change with the cpu frequency. So we need
25 * an extra value to store the TSC freq
26 */
27unsigned int tsc_khz;
28EXPORT_SYMBOL_GPL(tsc_khz);
29
30#ifdef CONFIG_X86_TSC
31static int __init tsc_setup(char *str)
32{
33 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
34 "cannot disable TSC completely.\n");
35 tsc_disabled = 1;
36 return 1;
37}
38#else
39/*
40 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
41 * in cpu/common.c
42 */
43static int __init tsc_setup(char *str)
44{
45 setup_clear_cpu_cap(X86_FEATURE_TSC);
46 return 1;
47}
48#endif
49
50__setup("notsc", tsc_setup);
51
52/*
53 * code to mark and check if the TSC is unstable
54 * due to cpufreq or due to unsynced TSCs
55 */
56static int tsc_unstable;
57
58int check_tsc_unstable(void)
59{
60 return tsc_unstable;
61}
62EXPORT_SYMBOL_GPL(check_tsc_unstable);
63
64/* Accelerators for sched_clock()
65 * convert from cycles(64bits) => nanoseconds (64bits)
66 * basic equation:
67 * ns = cycles / (freq / ns_per_sec)
68 * ns = cycles * (ns_per_sec / freq)
69 * ns = cycles * (10^9 / (cpu_khz * 10^3))
70 * ns = cycles * (10^6 / cpu_khz)
71 *
72 * Then we use scaling math (suggested by george@mvista.com) to get:
73 * ns = cycles * (10^6 * SC / cpu_khz) / SC
74 * ns = cycles * cyc2ns_scale / SC
75 *
76 * And since SC is a constant power of two, we can convert the div
77 * into a shift.
78 *
79 * We can use khz divisor instead of mhz to keep a better precision, since
80 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
81 * (mathieu.desnoyers@polymtl.ca)
82 *
83 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
84 */
85
86DEFINE_PER_CPU(unsigned long, cyc2ns);
87
88static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
89{
90 unsigned long long tsc_now, ns_now;
91 unsigned long flags, *scale;
92
93 local_irq_save(flags);
94 sched_clock_idle_sleep_event();
95
96 scale = &per_cpu(cyc2ns, cpu);
97
98 rdtscll(tsc_now);
99 ns_now = __cycles_2_ns(tsc_now);
100
101 if (cpu_khz)
102 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
103
104 /*
105 * Start smoothly with the new frequency:
106 */
107 sched_clock_idle_wakeup_event(0);
108 local_irq_restore(flags);
109}
110
111/*
112 * Scheduler clock - returns current time in nanosec units.
113 */
114unsigned long long native_sched_clock(void)
115{
116 unsigned long long this_offset;
117
118 /*
119 * Fall back to jiffies if there's no TSC available:
120 * ( But note that we still use it if the TSC is marked
121 * unstable. We do this because unlike Time Of Day,
122 * the scheduler clock tolerates small errors and it's
123 * very important for it to be as fast as the platform
124 * can achive it. )
125 */
126 if (unlikely(tsc_disabled))
127 /* No locking but a rare wrong value is not a big deal: */
128 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
129
130 /* read the Time Stamp Counter: */
131 rdtscll(this_offset);
132
133 /* return the value in ns */
134 return cycles_2_ns(this_offset);
135}
136
137/* We need to define a real function for sched_clock, to override the
138 weak default version */
139#ifdef CONFIG_PARAVIRT
140unsigned long long sched_clock(void)
141{
142 return paravirt_sched_clock();
143}
144#else
145unsigned long long sched_clock(void)
146 __attribute__((alias("native_sched_clock")));
147#endif
148
149unsigned long native_calculate_cpu_khz(void)
150{
151 unsigned long long start, end;
152 unsigned long count;
153 u64 delta64 = (u64)ULLONG_MAX;
154 int i;
155 unsigned long flags;
156
157 local_irq_save(flags);
158
159 /* run 3 times to ensure the cache is warm and to get an accurate reading */
160 for (i = 0; i < 3; i++) {
161 mach_prepare_counter();
162 rdtscll(start);
163 mach_countup(&count);
164 rdtscll(end);
165
166 /*
167 * Error: ECTCNEVERSET
168 * The CTC wasn't reliable: we got a hit on the very first read,
169 * or the CPU was so fast/slow that the quotient wouldn't fit in
170 * 32 bits..
171 */
172 if (count <= 1)
173 continue;
174
175 /* cpu freq too slow: */
176 if ((end - start) <= CALIBRATE_TIME_MSEC)
177 continue;
178
179 /*
180 * We want the minimum time of all runs in case one of them
181 * is inaccurate due to SMI or other delay
182 */
183 delta64 = min(delta64, (end - start));
184 }
185
186 /* cpu freq too fast (or every run was bad): */
187 if (delta64 > (1ULL<<32))
188 goto err;
189
190 delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
191 do_div(delta64,CALIBRATE_TIME_MSEC);
192
193 local_irq_restore(flags);
194 return (unsigned long)delta64;
195err:
196 local_irq_restore(flags);
197 return 0;
198}
199
200int recalibrate_cpu_khz(void)
201{
202#ifndef CONFIG_SMP
203 unsigned long cpu_khz_old = cpu_khz;
204
205 if (cpu_has_tsc) {
206 cpu_khz = calculate_cpu_khz();
207 tsc_khz = cpu_khz;
208 cpu_data(0).loops_per_jiffy =
209 cpufreq_scale(cpu_data(0).loops_per_jiffy,
210 cpu_khz_old, cpu_khz);
211 return 0;
212 } else
213 return -ENODEV;
214#else
215 return -ENODEV;
216#endif
217}
218
219EXPORT_SYMBOL(recalibrate_cpu_khz);
220
221#ifdef CONFIG_CPU_FREQ
222
223/*
224 * if the CPU frequency is scaled, TSC-based delays will need a different
225 * loops_per_jiffy value to function properly.
226 */
227static unsigned int ref_freq;
228static unsigned long loops_per_jiffy_ref;
229static unsigned long cpu_khz_ref;
230
231static int
232time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
233{
234 struct cpufreq_freqs *freq = data;
235
236 if (!ref_freq) {
237 if (!freq->old){
238 ref_freq = freq->new;
239 return 0;
240 }
241 ref_freq = freq->old;
242 loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
243 cpu_khz_ref = cpu_khz;
244 }
245
246 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
247 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
248 (val == CPUFREQ_RESUMECHANGE)) {
249 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
250 cpu_data(freq->cpu).loops_per_jiffy =
251 cpufreq_scale(loops_per_jiffy_ref,
252 ref_freq, freq->new);
253
254 if (cpu_khz) {
255
256 if (num_online_cpus() == 1)
257 cpu_khz = cpufreq_scale(cpu_khz_ref,
258 ref_freq, freq->new);
259 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
260 tsc_khz = cpu_khz;
261 set_cyc2ns_scale(cpu_khz, freq->cpu);
262 /*
263 * TSC based sched_clock turns
264 * to junk w/ cpufreq
265 */
266 mark_tsc_unstable("cpufreq changes");
267 }
268 }
269 }
270
271 return 0;
272}
273
274static struct notifier_block time_cpufreq_notifier_block = {
275 .notifier_call = time_cpufreq_notifier
276};
277
278static int __init cpufreq_tsc(void)
279{
280 return cpufreq_register_notifier(&time_cpufreq_notifier_block,
281 CPUFREQ_TRANSITION_NOTIFIER);
282}
283core_initcall(cpufreq_tsc);
284
285#endif
286
287/* clock source code */
288
289static unsigned long current_tsc_khz;
290static struct clocksource clocksource_tsc;
291
292/*
293 * We compare the TSC to the cycle_last value in the clocksource
294 * structure to avoid a nasty time-warp issue. This can be observed in
295 * a very small window right after one CPU updated cycle_last under
296 * xtime lock and the other CPU reads a TSC value which is smaller
297 * than the cycle_last reference value due to a TSC which is slighty
298 * behind. This delta is nowhere else observable, but in that case it
299 * results in a forward time jump in the range of hours due to the
300 * unsigned delta calculation of the time keeping core code, which is
301 * necessary to support wrapping clocksources like pm timer.
302 */
303static cycle_t read_tsc(void)
304{
305 cycle_t ret;
306
307 rdtscll(ret);
308
309 return ret >= clocksource_tsc.cycle_last ?
310 ret : clocksource_tsc.cycle_last;
311}
312
313static struct clocksource clocksource_tsc = {
314 .name = "tsc",
315 .rating = 300,
316 .read = read_tsc,
317 .mask = CLOCKSOURCE_MASK(64),
318 .mult = 0, /* to be set */
319 .shift = 22,
320 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
321 CLOCK_SOURCE_MUST_VERIFY,
322};
323
324void mark_tsc_unstable(char *reason)
325{
326 if (!tsc_unstable) {
327 tsc_unstable = 1;
328 printk("Marking TSC unstable due to: %s.\n", reason);
329 /* Can be called before registration */
330 if (clocksource_tsc.mult)
331 clocksource_change_rating(&clocksource_tsc, 0);
332 else
333 clocksource_tsc.rating = 0;
334 }
335}
336EXPORT_SYMBOL_GPL(mark_tsc_unstable);
337
338static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
339{
340 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
341 d->ident);
342 tsc_unstable = 1;
343 return 0;
344}
345
346/* List of systems that have known TSC problems */
347static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
348 {
349 .callback = dmi_mark_tsc_unstable,
350 .ident = "IBM Thinkpad 380XD",
351 .matches = {
352 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
353 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
354 },
355 },
356 {}
357};
358
359/*
360 * Make an educated guess if the TSC is trustworthy and synchronized
361 * over all CPUs.
362 */
363__cpuinit int unsynchronized_tsc(void)
364{
365 if (!cpu_has_tsc || tsc_unstable)
366 return 1;
367
368 /* Anything with constant TSC should be synchronized */
369 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
370 return 0;
371
372 /*
373 * Intel systems are normally all synchronized.
374 * Exceptions must mark TSC as unstable:
375 */
376 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
377 /* assume multi socket systems are not synchronized: */
378 if (num_possible_cpus() > 1)
379 tsc_unstable = 1;
380 }
381 return tsc_unstable;
382}
383
384/*
385 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
386 */
387#ifdef CONFIG_MGEODE_LX
388/* RTSC counts during suspend */
389#define RTSC_SUSP 0x100
390
391static void __init check_geode_tsc_reliable(void)
392{
393 unsigned long res_low, res_high;
394
395 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
396 if (res_low & RTSC_SUSP)
397 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
398}
399#else
400static inline void check_geode_tsc_reliable(void) { }
401#endif
402
403
404void __init tsc_init(void)
405{
406 int cpu;
407
408 if (!cpu_has_tsc || tsc_disabled > 0)
409 return;
410
411 cpu_khz = calculate_cpu_khz();
412 tsc_khz = cpu_khz;
413
414 if (!cpu_khz) {
415 mark_tsc_unstable("could not calculate TSC khz");
416 return;
417 }
418
419 /* now allow native_sched_clock() to use rdtsc */
420 tsc_disabled = 0;
421
422 printk("Detected %lu.%03lu MHz processor.\n",
423 (unsigned long)cpu_khz / 1000,
424 (unsigned long)cpu_khz % 1000);
425
426 /*
427 * Secondary CPUs do not run through tsc_init(), so set up
428 * all the scale factors for all CPUs, assuming the same
429 * speed as the bootup CPU. (cpufreq notifiers will fix this
430 * up if their speed diverges)
431 */
432 for_each_possible_cpu(cpu)
433 set_cyc2ns_scale(cpu_khz, cpu);
434
435 use_tsc_delay();
436
437 /* Check and install the TSC clocksource */
438 dmi_check_system(bad_tsc_dmi_table);
439
440 unsynchronized_tsc();
441 check_geode_tsc_reliable();
442 current_tsc_khz = tsc_khz;
443 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
444 clocksource_tsc.shift);
445 /* lower the rating if we already know its unstable: */
446 if (check_tsc_unstable()) {
447 clocksource_tsc.rating = 0;
448 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
449 }
450 clocksource_register(&clocksource_tsc);
451}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
deleted file mode 100644
index 1784b8077a12..000000000000
--- a/arch/x86/kernel/tsc_64.c
+++ /dev/null
@@ -1,357 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/interrupt.h>
4#include <linux/init.h>
5#include <linux/clocksource.h>
6#include <linux/time.h>
7#include <linux/acpi.h>
8#include <linux/cpufreq.h>
9#include <linux/acpi_pmtmr.h>
10
11#include <asm/hpet.h>
12#include <asm/timex.h>
13#include <asm/timer.h>
14#include <asm/vgtod.h>
15
16static int notsc __initdata = 0;
17
18unsigned int cpu_khz; /* TSC clocks / usec, not used here */
19EXPORT_SYMBOL(cpu_khz);
20unsigned int tsc_khz;
21EXPORT_SYMBOL(tsc_khz);
22
23/* Accelerators for sched_clock()
24 * convert from cycles(64bits) => nanoseconds (64bits)
25 * basic equation:
26 * ns = cycles / (freq / ns_per_sec)
27 * ns = cycles * (ns_per_sec / freq)
28 * ns = cycles * (10^9 / (cpu_khz * 10^3))
29 * ns = cycles * (10^6 / cpu_khz)
30 *
31 * Then we use scaling math (suggested by george@mvista.com) to get:
32 * ns = cycles * (10^6 * SC / cpu_khz) / SC
33 * ns = cycles * cyc2ns_scale / SC
34 *
35 * And since SC is a constant power of two, we can convert the div
36 * into a shift.
37 *
38 * We can use khz divisor instead of mhz to keep a better precision, since
39 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
40 * (mathieu.desnoyers@polymtl.ca)
41 *
42 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
43 */
44DEFINE_PER_CPU(unsigned long, cyc2ns);
45
46static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
47{
48 unsigned long long tsc_now, ns_now;
49 unsigned long flags, *scale;
50
51 local_irq_save(flags);
52 sched_clock_idle_sleep_event();
53
54 scale = &per_cpu(cyc2ns, cpu);
55
56 rdtscll(tsc_now);
57 ns_now = __cycles_2_ns(tsc_now);
58
59 if (cpu_khz)
60 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
61
62 sched_clock_idle_wakeup_event(0);
63 local_irq_restore(flags);
64}
65
66unsigned long long native_sched_clock(void)
67{
68 unsigned long a = 0;
69
70 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
71 * which means it is not completely exact and may not be monotonous
72 * between CPUs. But the errors should be too small to matter for
73 * scheduling purposes.
74 */
75
76 rdtscll(a);
77 return cycles_2_ns(a);
78}
79
80/* We need to define a real function for sched_clock, to override the
81 weak default version */
82#ifdef CONFIG_PARAVIRT
83unsigned long long sched_clock(void)
84{
85 return paravirt_sched_clock();
86}
87#else
88unsigned long long
89sched_clock(void) __attribute__((alias("native_sched_clock")));
90#endif
91
92
93static int tsc_unstable;
94
95int check_tsc_unstable(void)
96{
97 return tsc_unstable;
98}
99EXPORT_SYMBOL_GPL(check_tsc_unstable);
100
101#ifdef CONFIG_CPU_FREQ
102
103/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
104 * changes.
105 *
106 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
107 * not that important because current Opteron setups do not support
108 * scaling on SMP anyroads.
109 *
110 * Should fix up last_tsc too. Currently gettimeofday in the
111 * first tick after the change will be slightly wrong.
112 */
113
114static unsigned int ref_freq;
115static unsigned long loops_per_jiffy_ref;
116static unsigned long tsc_khz_ref;
117
118static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
119 void *data)
120{
121 struct cpufreq_freqs *freq = data;
122 unsigned long *lpj, dummy;
123
124 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
125 return 0;
126
127 lpj = &dummy;
128 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
129#ifdef CONFIG_SMP
130 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
131#else
132 lpj = &boot_cpu_data.loops_per_jiffy;
133#endif
134
135 if (!ref_freq) {
136 ref_freq = freq->old;
137 loops_per_jiffy_ref = *lpj;
138 tsc_khz_ref = tsc_khz;
139 }
140 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
141 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
142 (val == CPUFREQ_RESUMECHANGE)) {
143 *lpj =
144 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
145
146 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
147 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
148 mark_tsc_unstable("cpufreq changes");
149 }
150
151 set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
152
153 return 0;
154}
155
156static struct notifier_block time_cpufreq_notifier_block = {
157 .notifier_call = time_cpufreq_notifier
158};
159
160static int __init cpufreq_tsc(void)
161{
162 cpufreq_register_notifier(&time_cpufreq_notifier_block,
163 CPUFREQ_TRANSITION_NOTIFIER);
164 return 0;
165}
166
167core_initcall(cpufreq_tsc);
168
169#endif
170
171#define MAX_RETRIES 5
172#define SMI_TRESHOLD 50000
173
174/*
175 * Read TSC and the reference counters. Take care of SMI disturbance
176 */
177static unsigned long __init tsc_read_refs(unsigned long *pm,
178 unsigned long *hpet)
179{
180 unsigned long t1, t2;
181 int i;
182
183 for (i = 0; i < MAX_RETRIES; i++) {
184 t1 = get_cycles();
185 if (hpet)
186 *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
187 else
188 *pm = acpi_pm_read_early();
189 t2 = get_cycles();
190 if ((t2 - t1) < SMI_TRESHOLD)
191 return t2;
192 }
193 return ULONG_MAX;
194}
195
196/**
197 * tsc_calibrate - calibrate the tsc on boot
198 */
199void __init tsc_calibrate(void)
200{
201 unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
202 int hpet = is_hpet_enabled(), cpu;
203
204 local_irq_save(flags);
205
206 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
207
208 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
209
210 outb(0xb0, 0x43);
211 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
212 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
213 tr1 = get_cycles();
214 while ((inb(0x61) & 0x20) == 0);
215 tr2 = get_cycles();
216
217 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
218
219 local_irq_restore(flags);
220
221 /*
222 * Preset the result with the raw and inaccurate PIT
223 * calibration value
224 */
225 tsc_khz = (tr2 - tr1) / 50;
226
227 /* hpet or pmtimer available ? */
228 if (!hpet && !pm1 && !pm2) {
229 printk(KERN_INFO "TSC calibrated against PIT\n");
230 goto out;
231 }
232
233 /* Check, whether the sampling was disturbed by an SMI */
234 if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
235 printk(KERN_WARNING "TSC calibration disturbed by SMI, "
236 "using PIT calibration result\n");
237 goto out;
238 }
239
240 tsc2 = (tsc2 - tsc1) * 1000000L;
241
242 if (hpet) {
243 printk(KERN_INFO "TSC calibrated against HPET\n");
244 if (hpet2 < hpet1)
245 hpet2 += 0x100000000;
246 hpet2 -= hpet1;
247 tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
248 } else {
249 printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
250 if (pm2 < pm1)
251 pm2 += ACPI_PM_OVRRUN;
252 pm2 -= pm1;
253 tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC;
254 }
255
256 tsc_khz = tsc2 / tsc1;
257
258out:
259 for_each_possible_cpu(cpu)
260 set_cyc2ns_scale(tsc_khz, cpu);
261}
262
263/*
264 * Make an educated guess if the TSC is trustworthy and synchronized
265 * over all CPUs.
266 */
267__cpuinit int unsynchronized_tsc(void)
268{
269 if (tsc_unstable)
270 return 1;
271
272#ifdef CONFIG_SMP
273 if (apic_is_clustered_box())
274 return 1;
275#endif
276
277 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
278 return 0;
279
280 /* Assume multi socket systems are not synchronized */
281 return num_present_cpus() > 1;
282}
283
284int __init notsc_setup(char *s)
285{
286 notsc = 1;
287 return 1;
288}
289
290__setup("notsc", notsc_setup);
291
292static struct clocksource clocksource_tsc;
293
294/*
295 * We compare the TSC to the cycle_last value in the clocksource
296 * structure to avoid a nasty time-warp. This can be observed in a
297 * very small window right after one CPU updated cycle_last under
298 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
299 * is smaller than the cycle_last reference value due to a TSC which
300 * is slighty behind. This delta is nowhere else observable, but in
301 * that case it results in a forward time jump in the range of hours
302 * due to the unsigned delta calculation of the time keeping core
303 * code, which is necessary to support wrapping clocksources like pm
304 * timer.
305 */
306static cycle_t read_tsc(void)
307{
308 cycle_t ret = (cycle_t)get_cycles();
309
310 return ret >= clocksource_tsc.cycle_last ?
311 ret : clocksource_tsc.cycle_last;
312}
313
314static cycle_t __vsyscall_fn vread_tsc(void)
315{
316 cycle_t ret = (cycle_t)vget_cycles();
317
318 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
319 ret : __vsyscall_gtod_data.clock.cycle_last;
320}
321
322static struct clocksource clocksource_tsc = {
323 .name = "tsc",
324 .rating = 300,
325 .read = read_tsc,
326 .mask = CLOCKSOURCE_MASK(64),
327 .shift = 22,
328 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
329 CLOCK_SOURCE_MUST_VERIFY,
330 .vread = vread_tsc,
331};
332
333void mark_tsc_unstable(char *reason)
334{
335 if (!tsc_unstable) {
336 tsc_unstable = 1;
337 printk("Marking TSC unstable due to %s\n", reason);
338 /* Change only the rating, when not registered */
339 if (clocksource_tsc.mult)
340 clocksource_change_rating(&clocksource_tsc, 0);
341 else
342 clocksource_tsc.rating = 0;
343 }
344}
345EXPORT_SYMBOL_GPL(mark_tsc_unstable);
346
347void __init init_tsc_clocksource(void)
348{
349 if (!notsc) {
350 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
351 clocksource_tsc.shift);
352 if (check_tsc_unstable())
353 clocksource_tsc.rating = 0;
354
355 clocksource_register(&clocksource_tsc);
356 }
357}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
new file mode 100644
index 000000000000..e94bdb6add1d
--- /dev/null
+++ b/arch/x86/kernel/visws_quirks.c
@@ -0,0 +1,709 @@
1/*
2 * SGI Visual Workstation support and quirks, unmaintained.
3 *
4 * Split out from setup.c by davej@suse.de
5 *
6 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
7 *
8 * SGI Visual Workstation interrupt controller
9 *
10 * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
11 * which serves as the main interrupt controller in the system. Non-legacy
12 * hardware in the system uses this controller directly. Legacy devices
13 * are connected to the PIIX4 which in turn has its 8259(s) connected to
14 * a of the Cobalt APIC entry.
15 *
16 * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
17 *
18 * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
19 */
20#include <linux/interrupt.h>
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/smp.h>
24
25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h>
27#include <asm/arch_hooks.h>
28#include <asm/fixmap.h>
29#include <asm/reboot.h>
30#include <asm/setup.h>
31#include <asm/e820.h>
32#include <asm/smp.h>
33#include <asm/io.h>
34
35#include <mach_ipi.h>
36
37#include "mach_apic.h"
38
39#include <linux/init.h>
40#include <linux/smp.h>
41
42#include <linux/kernel_stat.h>
43#include <linux/interrupt.h>
44#include <linux/init.h>
45
46#include <asm/io.h>
47#include <asm/apic.h>
48#include <asm/i8259.h>
49#include <asm/irq_vectors.h>
50#include <asm/visws/cobalt.h>
51#include <asm/visws/lithium.h>
52#include <asm/visws/piix4.h>
53
54#include <linux/sched.h>
55#include <linux/kernel.h>
56#include <linux/init.h>
57#include <linux/pci.h>
58#include <linux/pci_ids.h>
59
60extern int no_broadcast;
61
62#include <asm/io.h>
63#include <asm/apic.h>
64#include <asm/arch_hooks.h>
65#include <asm/visws/cobalt.h>
66#include <asm/visws/lithium.h>
67
68char visws_board_type = -1;
69char visws_board_rev = -1;
70
71int is_visws_box(void)
72{
73 return visws_board_type >= 0;
74}
75
76static int __init visws_time_init_quirk(void)
77{
78 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
79
80 /* Set the countdown value */
81 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
82
83 /* Start the timer */
84 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
85
86 /* Enable (unmask) the timer interrupt */
87 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
88
89 /*
90 * Zero return means the generic timer setup code will set up
91 * the standard vector:
92 */
93 return 0;
94}
95
96static int __init visws_pre_intr_init_quirk(void)
97{
98 init_VISWS_APIC_irqs();
99
100 /*
101 * We dont want ISA irqs to be set up by the generic code:
102 */
103 return 1;
104}
105
106/* Quirk for machine specific memory setup. */
107
108#define MB (1024 * 1024)
109
110unsigned long sgivwfb_mem_phys;
111unsigned long sgivwfb_mem_size;
112EXPORT_SYMBOL(sgivwfb_mem_phys);
113EXPORT_SYMBOL(sgivwfb_mem_size);
114
115long long mem_size __initdata = 0;
116
117static char * __init visws_memory_setup_quirk(void)
118{
119 long long gfx_mem_size = 8 * MB;
120
121 mem_size = boot_params.alt_mem_k;
122
123 if (!mem_size) {
124 printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
125 mem_size = 128 * MB;
126 }
127
128 /*
129 * this hardcodes the graphics memory to 8 MB
130 * it really should be sized dynamically (or at least
131 * set as a boot param)
132 */
133 if (!sgivwfb_mem_size) {
134 printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
135 sgivwfb_mem_size = 8 * MB;
136 }
137
138 /*
139 * Trim to nearest MB
140 */
141 sgivwfb_mem_size &= ~((1 << 20) - 1);
142 sgivwfb_mem_phys = mem_size - gfx_mem_size;
143
144 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
145 e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
146 e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
147
148 return "PROM";
149}
150
151static void visws_machine_emergency_restart(void)
152{
153 /*
154 * Visual Workstations restart after this
155 * register is poked on the PIIX4
156 */
157 outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
158}
159
160static void visws_machine_power_off(void)
161{
162 unsigned short pm_status;
163/* extern unsigned int pci_bus0; */
164
165 while ((pm_status = inw(PMSTS_PORT)) & 0x100)
166 outw(pm_status, PMSTS_PORT);
167
168 outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
169
170 mdelay(10);
171
172#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
173 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
174
175/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
176 outl(PIIX_SPECIAL_STOP, 0xCFC);
177}
178
179static int __init visws_get_smp_config_quirk(unsigned int early)
180{
181 /*
182 * Prevent MP-table parsing by the generic code:
183 */
184 return 1;
185}
186
187extern unsigned int __cpuinitdata maxcpus;
188
189/*
190 * The Visual Workstation is Intel MP compliant in the hardware
191 * sense, but it doesn't have a BIOS(-configuration table).
192 * No problem for Linux.
193 */
194
195static void __init MP_processor_info (struct mpc_config_processor *m)
196{
197 int ver, logical_apicid;
198 physid_mask_t apic_cpus;
199
200 if (!(m->mpc_cpuflag & CPU_ENABLED))
201 return;
202
203 logical_apicid = m->mpc_apicid;
204 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
205 m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
206 m->mpc_apicid,
207 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
208 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
209 m->mpc_apicver);
210
211 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
212 boot_cpu_physical_apicid = m->mpc_apicid;
213
214 ver = m->mpc_apicver;
215 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
216 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
217 m->mpc_apicid, MAX_APICS);
218 return;
219 }
220
221 apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
222 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
223 /*
224 * Validate version
225 */
226 if (ver == 0x0) {
227 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
228 "fixing up to 0x10. (tell your hw vendor)\n",
229 m->mpc_apicid);
230 ver = 0x10;
231 }
232 apic_version[m->mpc_apicid] = ver;
233}
234
235int __init visws_find_smp_config_quirk(unsigned int reserve)
236{
237 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
238 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
239
240 if (ncpus > CO_CPU_MAX) {
241 printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
242 ncpus, mp);
243
244 ncpus = CO_CPU_MAX;
245 }
246
247 if (ncpus > maxcpus)
248 ncpus = maxcpus;
249
250#ifdef CONFIG_X86_LOCAL_APIC
251 smp_found_config = 1;
252#endif
253 while (ncpus--)
254 MP_processor_info(mp++);
255
256 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
257
258 return 1;
259}
260
261extern int visws_trap_init_quirk(void);
262
263void __init visws_early_detect(void)
264{
265 int raw;
266
267 visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
268 >> PIIX_GPI_BD_SHIFT;
269
270 if (visws_board_type < 0)
271 return;
272
273 /*
274 * Install special quirks for timer, interrupt and memory setup:
275 */
276 arch_time_init_quirk = visws_time_init_quirk;
277 arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
278 arch_memory_setup_quirk = visws_memory_setup_quirk;
279
280 /*
281 * Fall back to generic behavior for traps:
282 */
283 arch_intr_init_quirk = NULL;
284 arch_trap_init_quirk = visws_trap_init_quirk;
285
286 /*
287 * Install reboot quirks:
288 */
289 pm_power_off = visws_machine_power_off;
290 machine_ops.emergency_restart = visws_machine_emergency_restart;
291
292 /*
293 * Do not use broadcast IPIs:
294 */
295 no_broadcast = 0;
296
297 /*
298 * Override generic MP-table parsing:
299 */
300 mach_get_smp_config_quirk = visws_get_smp_config_quirk;
301 mach_find_smp_config_quirk = visws_find_smp_config_quirk;
302
303#ifdef CONFIG_X86_IO_APIC
304 /*
305 * Turn off IO-APIC detection and initialization:
306 */
307 skip_ioapic_setup = 1;
308#endif
309
310 /*
311 * Get Board rev.
312 * First, we have to initialize the 307 part to allow us access
313 * to the GPIO registers. Let's map them at 0x0fc0 which is right
314 * after the PIIX4 PM section.
315 */
316 outb_p(SIO_DEV_SEL, SIO_INDEX);
317 outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
318
319 outb_p(SIO_DEV_MSB, SIO_INDEX);
320 outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
321
322 outb_p(SIO_DEV_LSB, SIO_INDEX);
323 outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
324
325 outb_p(SIO_DEV_ENB, SIO_INDEX);
326 outb_p(1, SIO_DATA); /* Enable GPIO registers. */
327
328 /*
329 * Now, we have to map the power management section to write
330 * a bit which enables access to the GPIO registers.
331 * What lunatic came up with this shit?
332 */
333 outb_p(SIO_DEV_SEL, SIO_INDEX);
334 outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
335
336 outb_p(SIO_DEV_MSB, SIO_INDEX);
337 outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
338
339 outb_p(SIO_DEV_LSB, SIO_INDEX);
340 outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
341
342 outb_p(SIO_DEV_ENB, SIO_INDEX);
343 outb_p(1, SIO_DATA); /* Enable PM registers. */
344
345 /*
346 * Now, write the PM register which enables the GPIO registers.
347 */
348 outb_p(SIO_PM_FER2, SIO_PM_INDEX);
349 outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
350
351 /*
352 * Now, initialize the GPIO registers.
353 * We want them all to be inputs which is the
354 * power on default, so let's leave them alone.
355 * So, let's just read the board rev!
356 */
357 raw = inb_p(SIO_GP_DATA1);
358 raw &= 0x7f; /* 7 bits of valid board revision ID. */
359
360 if (visws_board_type == VISWS_320) {
361 if (raw < 0x6) {
362 visws_board_rev = 4;
363 } else if (raw < 0xc) {
364 visws_board_rev = 5;
365 } else {
366 visws_board_rev = 6;
367 }
368 } else if (visws_board_type == VISWS_540) {
369 visws_board_rev = 2;
370 } else {
371 visws_board_rev = raw;
372 }
373
374 printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
375 (visws_board_type == VISWS_320 ? "320" :
376 (visws_board_type == VISWS_540 ? "540" :
377 "unknown")), visws_board_rev);
378}
379
380#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
381#define BCD (LI_INTB | LI_INTC | LI_INTD)
382#define ALLDEVS (A01234 | BCD)
383
384static __init void lithium_init(void)
385{
386 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
387 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
388
389 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
390 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
391 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
392/* panic("This machine is not SGI Visual Workstation 320/540"); */
393 }
394
395 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
396 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
397 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
398/* panic("This machine is not SGI Visual Workstation 320/540"); */
399 }
400
401 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
402 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
403}
404
405static __init void cobalt_init(void)
406{
407 /*
408 * On normal SMP PC this is used only with SMP, but we have to
409 * use it and set it up here to start the Cobalt clock
410 */
411 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
412 setup_local_APIC();
413 printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
414 (unsigned int)apic_read(APIC_LVR),
415 (unsigned int)apic_read(APIC_ID));
416
417 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
418 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
419 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
420 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
421
422 /* Enable Cobalt APIC being careful to NOT change the ID! */
423 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
424
425 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
426 co_apic_read(CO_APIC_ID));
427}
428
429int __init visws_trap_init_quirk(void)
430{
431 lithium_init();
432 cobalt_init();
433
434 return 1;
435}
436
437/*
438 * IRQ controller / APIC support:
439 */
440
441static DEFINE_SPINLOCK(cobalt_lock);
442
443/*
444 * Set the given Cobalt APIC Redirection Table entry to point
445 * to the given IDT vector/index.
446 */
447static inline void co_apic_set(int entry, int irq)
448{
449 co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
450 co_apic_write(CO_APIC_HI(entry), 0);
451}
452
453/*
454 * Cobalt (IO)-APIC functions to handle PCI devices.
455 */
456static inline int co_apic_ide0_hack(void)
457{
458 extern char visws_board_type;
459 extern char visws_board_rev;
460
461 if (visws_board_type == VISWS_320 && visws_board_rev == 5)
462 return 5;
463 return CO_APIC_IDE0;
464}
465
466static int is_co_apic(unsigned int irq)
467{
468 if (IS_CO_APIC(irq))
469 return CO_APIC(irq);
470
471 switch (irq) {
472 case 0: return CO_APIC_CPU;
473 case CO_IRQ_IDE0: return co_apic_ide0_hack();
474 case CO_IRQ_IDE1: return CO_APIC_IDE1;
475 default: return -1;
476 }
477}
478
479
480/*
481 * This is the SGI Cobalt (IO-)APIC:
482 */
483
484static void enable_cobalt_irq(unsigned int irq)
485{
486 co_apic_set(is_co_apic(irq), irq);
487}
488
489static void disable_cobalt_irq(unsigned int irq)
490{
491 int entry = is_co_apic(irq);
492
493 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
494 co_apic_read(CO_APIC_LO(entry));
495}
496
497/*
498 * "irq" really just serves to identify the device. Here is where we
499 * map this to the Cobalt APIC entry where it's physically wired.
500 * This is called via request_irq -> setup_irq -> irq_desc->startup()
501 */
502static unsigned int startup_cobalt_irq(unsigned int irq)
503{
504 unsigned long flags;
505
506 spin_lock_irqsave(&cobalt_lock, flags);
507 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
508 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
509 enable_cobalt_irq(irq);
510 spin_unlock_irqrestore(&cobalt_lock, flags);
511 return 0;
512}
513
514static void ack_cobalt_irq(unsigned int irq)
515{
516 unsigned long flags;
517
518 spin_lock_irqsave(&cobalt_lock, flags);
519 disable_cobalt_irq(irq);
520 apic_write(APIC_EOI, APIC_EIO_ACK);
521 spin_unlock_irqrestore(&cobalt_lock, flags);
522}
523
524static void end_cobalt_irq(unsigned int irq)
525{
526 unsigned long flags;
527
528 spin_lock_irqsave(&cobalt_lock, flags);
529 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
530 enable_cobalt_irq(irq);
531 spin_unlock_irqrestore(&cobalt_lock, flags);
532}
533
534static struct irq_chip cobalt_irq_type = {
535 .typename = "Cobalt-APIC",
536 .startup = startup_cobalt_irq,
537 .shutdown = disable_cobalt_irq,
538 .enable = enable_cobalt_irq,
539 .disable = disable_cobalt_irq,
540 .ack = ack_cobalt_irq,
541 .end = end_cobalt_irq,
542};
543
544
545/*
546 * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
547 * -- not the manner expected by the code in i8259.c.
548 *
549 * there is a 'master' physical interrupt source that gets sent to
550 * the CPU. But in the chipset there are various 'virtual' interrupts
551 * waiting to be handled. We represent this to Linux through a 'master'
552 * interrupt controller type, and through a special virtual interrupt-
553 * controller. Device drivers only see the virtual interrupt sources.
554 */
555static unsigned int startup_piix4_master_irq(unsigned int irq)
556{
557 init_8259A(0);
558
559 return startup_cobalt_irq(irq);
560}
561
562static void end_piix4_master_irq(unsigned int irq)
563{
564 unsigned long flags;
565
566 spin_lock_irqsave(&cobalt_lock, flags);
567 enable_cobalt_irq(irq);
568 spin_unlock_irqrestore(&cobalt_lock, flags);
569}
570
571static struct irq_chip piix4_master_irq_type = {
572 .typename = "PIIX4-master",
573 .startup = startup_piix4_master_irq,
574 .ack = ack_cobalt_irq,
575 .end = end_piix4_master_irq,
576};
577
578
579static struct irq_chip piix4_virtual_irq_type = {
580 .typename = "PIIX4-virtual",
581 .shutdown = disable_8259A_irq,
582 .enable = enable_8259A_irq,
583 .disable = disable_8259A_irq,
584};
585
586
587/*
588 * PIIX4-8259 master/virtual functions to handle interrupt requests
589 * from legacy devices: floppy, parallel, serial, rtc.
590 *
591 * None of these get Cobalt APIC entries, neither do they have IDT
592 * entries. These interrupts are purely virtual and distributed from
593 * the 'master' interrupt source: CO_IRQ_8259.
594 *
595 * When the 8259 interrupts its handler figures out which of these
596 * devices is interrupting and dispatches to its handler.
597 *
598 * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
599 * enable_irq gets the right irq. This 'master' irq is never directly
600 * manipulated by any driver.
601 */
602static irqreturn_t piix4_master_intr(int irq, void *dev_id)
603{
604 int realirq;
605 irq_desc_t *desc;
606 unsigned long flags;
607
608 spin_lock_irqsave(&i8259A_lock, flags);
609
610 /* Find out what's interrupting in the PIIX4 master 8259 */
611 outb(0x0c, 0x20); /* OCW3 Poll command */
612 realirq = inb(0x20);
613
614 /*
615 * Bit 7 == 0 means invalid/spurious
616 */
617 if (unlikely(!(realirq & 0x80)))
618 goto out_unlock;
619
620 realirq &= 7;
621
622 if (unlikely(realirq == 2)) {
623 outb(0x0c, 0xa0);
624 realirq = inb(0xa0);
625
626 if (unlikely(!(realirq & 0x80)))
627 goto out_unlock;
628
629 realirq = (realirq & 7) + 8;
630 }
631
632 /* mask and ack interrupt */
633 cached_irq_mask |= 1 << realirq;
634 if (unlikely(realirq > 7)) {
635 inb(0xa1);
636 outb(cached_slave_mask, 0xa1);
637 outb(0x60 + (realirq & 7), 0xa0);
638 outb(0x60 + 2, 0x20);
639 } else {
640 inb(0x21);
641 outb(cached_master_mask, 0x21);
642 outb(0x60 + realirq, 0x20);
643 }
644
645 spin_unlock_irqrestore(&i8259A_lock, flags);
646
647 desc = irq_desc + realirq;
648
649 /*
650 * handle this 'virtual interrupt' as a Cobalt one now.
651 */
652 kstat_cpu(smp_processor_id()).irqs[realirq]++;
653
654 if (likely(desc->action != NULL))
655 handle_IRQ_event(realirq, desc->action);
656
657 if (!(desc->status & IRQ_DISABLED))
658 enable_8259A_irq(realirq);
659
660 return IRQ_HANDLED;
661
662out_unlock:
663 spin_unlock_irqrestore(&i8259A_lock, flags);
664 return IRQ_NONE;
665}
666
667static struct irqaction master_action = {
668 .handler = piix4_master_intr,
669 .name = "PIIX4-8259",
670};
671
672static struct irqaction cascade_action = {
673 .handler = no_action,
674 .name = "cascade",
675};
676
677
678void init_VISWS_APIC_irqs(void)
679{
680 int i;
681
682 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
683 irq_desc[i].status = IRQ_DISABLED;
684 irq_desc[i].action = 0;
685 irq_desc[i].depth = 1;
686
687 if (i == 0) {
688 irq_desc[i].chip = &cobalt_irq_type;
689 }
690 else if (i == CO_IRQ_IDE0) {
691 irq_desc[i].chip = &cobalt_irq_type;
692 }
693 else if (i == CO_IRQ_IDE1) {
694 irq_desc[i].chip = &cobalt_irq_type;
695 }
696 else if (i == CO_IRQ_8259) {
697 irq_desc[i].chip = &piix4_master_irq_type;
698 }
699 else if (i < CO_IRQ_APIC0) {
700 irq_desc[i].chip = &piix4_virtual_irq_type;
701 }
702 else if (IS_CO_APIC(i)) {
703 irq_desc[i].chip = &cobalt_irq_type;
704 }
705 }
706
707 setup_irq(CO_IRQ_8259, &master_action);
708 setup_irq(2, &cascade_action);
709}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 956f38927aa7..b15346092b7b 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
151 insns, ip); 151 insns, ip);
152 case PARAVIRT_PATCH(pv_cpu_ops.iret): 152 case PARAVIRT_PATCH(pv_cpu_ops.iret):
153 return patch_internal(VMI_CALL_IRET, len, insns, ip); 153 return patch_internal(VMI_CALL_IRET, len, insns, ip);
154 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): 154 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
155 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); 155 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
156 default: 156 default:
157 break; 157 break;
@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void)
896 * the backend. They are performance critical anyway, so requiring 896 * the backend. They are performance critical anyway, so requiring
897 * a patch is not a big problem. 897 * a patch is not a big problem.
898 */ 898 */
899 pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; 899 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
900 pv_cpu_ops.iret = (void *)0xbadbab0; 900 pv_cpu_ops.iret = (void *)0xbadbab0;
901 901
902#ifdef CONFIG_SMP 902#ifdef CONFIG_SMP
@@ -932,7 +932,7 @@ static inline int __init activate_vmi(void)
932 pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; 932 pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
933#endif 933#endif
934 pv_time_ops.sched_clock = vmi_sched_clock; 934 pv_time_ops.sched_clock = vmi_sched_clock;
935 pv_time_ops.get_cpu_khz = vmi_cpu_khz; 935 pv_time_ops.get_tsc_khz = vmi_tsc_khz;
936 936
937 /* We have true wallclock functions; disable CMOS clock sync */ 937 /* We have true wallclock functions; disable CMOS clock sync */
938 no_sync_cmos_clock = 1; 938 no_sync_cmos_clock = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index a2b030780aa9..6953859fe289 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -33,8 +33,7 @@
33#include <asm/apic.h> 33#include <asm/apic.h>
34#include <asm/timer.h> 34#include <asm/timer.h>
35#include <asm/i8253.h> 35#include <asm/i8253.h>
36 36#include <asm/irq_vectors.h>
37#include <irq_vectors.h>
38 37
39#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) 38#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
40#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) 39#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
@@ -70,8 +69,8 @@ unsigned long long vmi_sched_clock(void)
70 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); 69 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
71} 70}
72 71
73/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ 72/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
74unsigned long vmi_cpu_khz(void) 73unsigned long vmi_tsc_khz(void)
75{ 74{
76 unsigned long long khz; 75 unsigned long long khz;
77 khz = vmi_timer_ops.get_cycle_frequency(); 76 khz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index ce5ed083a1e9..2674f5796275 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -60,13 +60,6 @@ SECTIONS
60 60
61 BUG_TABLE :text 61 BUG_TABLE :text
62 62
63 . = ALIGN(4);
64 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
65 __tracedata_start = .;
66 *(.tracedata)
67 __tracedata_end = .;
68 }
69
70 RODATA 63 RODATA
71 64
72 /* writeable */ 65 /* writeable */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fad3674b06a5..fd246e22fe6b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -53,13 +53,6 @@ SECTIONS
53 53
54 RODATA 54 RODATA
55 55
56 . = ALIGN(4);
57 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
58 __tracedata_start = .;
59 *(.tracedata)
60 __tracedata_end = .;
61 }
62
63 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ 56 . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
64 /* Data */ 57 /* Data */
65 .data : AT(ADDR(.data) - LOAD_OFFSET) { 58 .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -177,6 +170,7 @@ SECTIONS
177 *(.con_initcall.init) 170 *(.con_initcall.init)
178 } 171 }
179 __con_initcall_end = .; 172 __con_initcall_end = .;
173 . = ALIGN(16);
180 __x86cpuvendor_start = .; 174 __x86cpuvendor_start = .;
181 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 175 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) {
182 *(.x86cpuvendor.init) 176 *(.x86cpuvendor.init)
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index ba8c0b75ab0a..0c029e8959c7 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -15,9 +15,12 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/pci_ids.h> 16#include <linux/pci_ids.h>
17#include <linux/pci_regs.h> 17#include <linux/pci_regs.h>
18
19#include <asm/apic.h>
18#include <asm/pci-direct.h> 20#include <asm/pci-direct.h>
19#include <asm/io.h> 21#include <asm/io.h>
20#include <asm/paravirt.h> 22#include <asm/paravirt.h>
23#include <asm/setup.h>
21 24
22#if defined CONFIG_PCI && defined CONFIG_PARAVIRT 25#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
23/* 26/*
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 61efa2f7d564..c87cbd84c3e5 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -249,7 +249,7 @@ static ctl_table kernel_root_table2[] = {
249 doesn't violate that. We'll find out if it does. */ 249 doesn't violate that. We'll find out if it does. */
250static void __cpuinit vsyscall_set_cpu(int cpu) 250static void __cpuinit vsyscall_set_cpu(int cpu)
251{ 251{
252 unsigned long *d; 252 unsigned long d;
253 unsigned long node = 0; 253 unsigned long node = 0;
254#ifdef CONFIG_NUMA 254#ifdef CONFIG_NUMA
255 node = cpu_to_node(cpu); 255 node = cpu_to_node(cpu);
@@ -260,11 +260,11 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
260 /* Store cpu number in limit so that it can be loaded quickly 260 /* Store cpu number in limit so that it can be loaded quickly
261 in user space in vgetcpu. 261 in user space in vgetcpu.
262 12 bits for the CPU and 8 bits for the node. */ 262 12 bits for the CPU and 8 bits for the node. */
263 d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU); 263 d = 0x0f40000000000ULL;
264 *d = 0x0f40000000000ULL; 264 d |= cpu;
265 *d |= cpu; 265 d |= (node & 0xf) << 12;
266 *d |= (node & 0xf) << 12; 266 d |= (node >> 4) << 48;
267 *d |= (node >> 4) << 48; 267 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
268} 268}
269 269
270static void __cpuinit cpu_vsyscall_init(void *arg) 270static void __cpuinit cpu_vsyscall_init(void *arg)
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index f6c05d0410fb..2f306a826897 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -53,8 +53,3 @@ EXPORT_SYMBOL(init_level4_pgt);
53EXPORT_SYMBOL(load_gs_index); 53EXPORT_SYMBOL(load_gs_index);
54 54
55EXPORT_SYMBOL(_proxy_pda); 55EXPORT_SYMBOL(_proxy_pda);
56
57#ifdef CONFIG_PARAVIRT
58/* Virtualized guests may want to use it */
59EXPORT_SYMBOL_GPL(cpu_gdt_descr);
60#endif
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 964dfa36d367..c70e12b1a637 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -3,7 +3,7 @@ config LGUEST_GUEST
3 select PARAVIRT 3 select PARAVIRT
4 depends on X86_32 4 depends on X86_32
5 depends on !X86_PAE 5 depends on !X86_PAE
6 depends on !(X86_VISWS || X86_VOYAGER) 6 depends on !X86_VOYAGER
7 select VIRTIO 7 select VIRTIO
8 select VIRTIO_RING 8 select VIRTIO_RING
9 select VIRTIO_CONSOLE 9 select VIRTIO_CONSOLE
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5c7e2fd52075..50dad44fb542 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void)
607 * what speed it runs at, or 0 if it's unusable as a reliable clock source. 607 * what speed it runs at, or 0 if it's unusable as a reliable clock source.
608 * This matches what we want here: if we return 0 from this function, the x86 608 * This matches what we want here: if we return 0 from this function, the x86
609 * TSC clock will give up and not register itself. */ 609 * TSC clock will give up and not register itself. */
610static unsigned long lguest_cpu_khz(void) 610static unsigned long lguest_tsc_khz(void)
611{ 611{
612 return lguest_data.tsc_khz; 612 return lguest_data.tsc_khz;
613} 613}
@@ -835,7 +835,7 @@ static __init char *lguest_memory_setup(void)
835 835
836 /* The Linux bootloader header contains an "e820" memory map: the 836 /* The Linux bootloader header contains an "e820" memory map: the
837 * Launcher populated the first entry with our memory limit. */ 837 * Launcher populated the first entry with our memory limit. */
838 add_memory_region(boot_params.e820_map[0].addr, 838 e820_add_region(boot_params.e820_map[0].addr,
839 boot_params.e820_map[0].size, 839 boot_params.e820_map[0].size,
840 boot_params.e820_map[0].type); 840 boot_params.e820_map[0].type);
841 841
@@ -998,7 +998,7 @@ __init void lguest_init(void)
998 /* time operations */ 998 /* time operations */
999 pv_time_ops.get_wallclock = lguest_get_wallclock; 999 pv_time_ops.get_wallclock = lguest_get_wallclock;
1000 pv_time_ops.time_init = lguest_time_init; 1000 pv_time_ops.time_init = lguest_time_init;
1001 pv_time_ops.get_cpu_khz = lguest_cpu_khz; 1001 pv_time_ops.get_tsc_khz = lguest_tsc_khz;
1002 1002
1003 /* Now is a good time to look at the implementations of these functions 1003 /* Now is a good time to look at the implementations of these functions
1004 * before returning to the rest of lguest_init(). */ 1004 * before returning to the rest of lguest_init(). */
@@ -1012,6 +1012,7 @@ __init void lguest_init(void)
1012 * clobbered. The Launcher places our initial pagetables somewhere at 1012 * clobbered. The Launcher places our initial pagetables somewhere at
1013 * the top of our physical memory, so we don't need extra space: set 1013 * the top of our physical memory, so we don't need extra space: set
1014 * init_pg_tables_end to the end of the kernel. */ 1014 * init_pg_tables_end to the end of the kernel. */
1015 init_pg_tables_start = __pa(pg0);
1015 init_pg_tables_end = __pa(pg0); 1016 init_pg_tables_end = __pa(pg0);
1016 1017
1017 /* Load the %fs segment register (the per-cpu segment register) with 1018 /* Load the %fs segment register (the per-cpu segment register) with
@@ -1065,9 +1066,9 @@ __init void lguest_init(void)
1065 pm_power_off = lguest_power_off; 1066 pm_power_off = lguest_power_off;
1066 machine_ops.restart = lguest_restart; 1067 machine_ops.restart = lguest_restart;
1067 1068
1068 /* Now we're set up, call start_kernel() in init/main.c and we proceed 1069 /* Now we're set up, call i386_start_kernel() in head32.c and we proceed
1069 * to boot as normal. It never returns. */ 1070 * to boot as normal. It never returns. */
1070 start_kernel(); 1071 i386_start_kernel();
1071} 1072}
1072/* 1073/*
1073 * This marks the end of stage II of our journey, The Guest. 1074 * This marks the end of stage II of our journey, The Guest.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a885..83226e0a7ce4 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -4,8 +4,8 @@
4 4
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay_$(BITS).o 7lib-y := delay.o
8lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o 8lib-y += usercopy_$(BITS).o getuser.o putuser.o
9lib-y += memcpy_$(BITS).o 9lib-y += memcpy_$(BITS).o
10 10
11ifeq ($(CONFIG_X86_32),y) 11ifeq ($(CONFIG_X86_32),y)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index ee1c3f635157..dfdf428975c0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -1,8 +1,10 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs. 1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2. 4 * Subject to the GNU Public License v2.
3 * 5 *
4 * Functions to copy from and to user space. 6 * Functions to copy from and to user space.
5 */ 7 */
6 8
7#include <linux/linkage.h> 9#include <linux/linkage.h>
8#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
@@ -20,60 +22,88 @@
20 .long \orig-1f /* by default jump to orig */ 22 .long \orig-1f /* by default jump to orig */
211: 231:
22 .section .altinstr_replacement,"ax" 24 .section .altinstr_replacement,"ax"
232: .byte 0xe9 /* near jump with 32bit immediate */ 252: .byte 0xe9 /* near jump with 32bit immediate */
24 .long \alt-1b /* offset */ /* or alternatively to alt */ 26 .long \alt-1b /* offset */ /* or alternatively to alt */
25 .previous 27 .previous
26 .section .altinstructions,"a" 28 .section .altinstructions,"a"
27 .align 8 29 .align 8
28 .quad 0b 30 .quad 0b
29 .quad 2b 31 .quad 2b
30 .byte \feature /* when feature is set */ 32 .byte \feature /* when feature is set */
31 .byte 5 33 .byte 5
32 .byte 5 34 .byte 5
33 .previous 35 .previous
34 .endm 36 .endm
35 37
36/* Standard copy_to_user with segment limit checking */ 38 .macro ALIGN_DESTINATION
39#ifdef FIX_ALIGNMENT
40 /* check for bad alignment of destination */
41 movl %edi,%ecx
42 andl $7,%ecx
43 jz 102f /* already aligned */
44 subl $8,%ecx
45 negl %ecx
46 subl %ecx,%edx
47100: movb (%rsi),%al
48101: movb %al,(%rdi)
49 incq %rsi
50 incq %rdi
51 decl %ecx
52 jnz 100b
53102:
54 .section .fixup,"ax"
55103: addl %r8d,%edx /* ecx is zerorest also */
56 jmp copy_user_handle_tail
57 .previous
58
59 .section __ex_table,"a"
60 .align 8
61 .quad 100b,103b
62 .quad 101b,103b
63 .previous
64#endif
65 .endm
66
67/* Standard copy_to_user with segment limit checking */
37ENTRY(copy_to_user) 68ENTRY(copy_to_user)
38 CFI_STARTPROC 69 CFI_STARTPROC
39 GET_THREAD_INFO(%rax) 70 GET_THREAD_INFO(%rax)
40 movq %rdi,%rcx 71 movq %rdi,%rcx
41 addq %rdx,%rcx 72 addq %rdx,%rcx
42 jc bad_to_user 73 jc bad_to_user
43 cmpq threadinfo_addr_limit(%rax),%rcx 74 cmpq TI_addr_limit(%rax),%rcx
44 jae bad_to_user 75 jae bad_to_user
45 xorl %eax,%eax /* clear zero flag */
46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 76 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47 CFI_ENDPROC 77 CFI_ENDPROC
48 78
49ENTRY(copy_user_generic) 79/* Standard copy_from_user with segment limit checking */
80ENTRY(copy_from_user)
50 CFI_STARTPROC 81 CFI_STARTPROC
51 movl $1,%ecx /* set zero flag */ 82 GET_THREAD_INFO(%rax)
83 movq %rsi,%rcx
84 addq %rdx,%rcx
85 jc bad_from_user
86 cmpq TI_addr_limit(%rax),%rcx
87 jae bad_from_user
52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 88 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
53 CFI_ENDPROC 89 CFI_ENDPROC
90ENDPROC(copy_from_user)
54 91
55ENTRY(__copy_from_user_inatomic) 92ENTRY(copy_user_generic)
56 CFI_STARTPROC 93 CFI_STARTPROC
57 xorl %ecx,%ecx /* clear zero flag */
58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 94 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
59 CFI_ENDPROC 95 CFI_ENDPROC
96ENDPROC(copy_user_generic)
60 97
61/* Standard copy_from_user with segment limit checking */ 98ENTRY(__copy_from_user_inatomic)
62ENTRY(copy_from_user)
63 CFI_STARTPROC 99 CFI_STARTPROC
64 GET_THREAD_INFO(%rax)
65 movq %rsi,%rcx
66 addq %rdx,%rcx
67 jc bad_from_user
68 cmpq threadinfo_addr_limit(%rax),%rcx
69 jae bad_from_user
70 movl $1,%ecx /* set zero flag */
71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 100 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72 CFI_ENDPROC 101 CFI_ENDPROC
73ENDPROC(copy_from_user) 102ENDPROC(__copy_from_user_inatomic)
74 103
75 .section .fixup,"ax" 104 .section .fixup,"ax"
76 /* must zero dest */ 105 /* must zero dest */
106ENTRY(bad_from_user)
77bad_from_user: 107bad_from_user:
78 CFI_STARTPROC 108 CFI_STARTPROC
79 movl %edx,%ecx 109 movl %edx,%ecx
@@ -81,271 +111,158 @@ bad_from_user:
81 rep 111 rep
82 stosb 112 stosb
83bad_to_user: 113bad_to_user:
84 movl %edx,%eax 114 movl %edx,%eax
85 ret 115 ret
86 CFI_ENDPROC 116 CFI_ENDPROC
87END(bad_from_user) 117ENDPROC(bad_from_user)
88 .previous 118 .previous
89 119
90
91/* 120/*
92 * copy_user_generic_unrolled - memory copy with exception handling. 121 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq 122 * This version is for CPUs like P4 that don't have efficient micro
94 * 123 * code for rep movsq
95 * Input: 124 *
125 * Input:
96 * rdi destination 126 * rdi destination
97 * rsi source 127 * rsi source
98 * rdx count 128 * rdx count
99 * ecx zero flag -- if true zero destination on error
100 * 129 *
101 * Output: 130 * Output:
102 * eax uncopied bytes or 0 if successful. 131 * eax uncopied bytes or 0 if successfull.
103 */ 132 */
104ENTRY(copy_user_generic_unrolled) 133ENTRY(copy_user_generic_unrolled)
105 CFI_STARTPROC 134 CFI_STARTPROC
106 pushq %rbx 135 cmpl $8,%edx
107 CFI_ADJUST_CFA_OFFSET 8 136 jb 20f /* less then 8 bytes, go to byte copy loop */
108 CFI_REL_OFFSET rbx, 0 137 ALIGN_DESTINATION
109 pushq %rcx 138 movl %edx,%ecx
110 CFI_ADJUST_CFA_OFFSET 8 139 andl $63,%edx
111 CFI_REL_OFFSET rcx, 0 140 shrl $6,%ecx
112 xorl %eax,%eax /*zero for the exception handler */ 141 jz 17f
113 1421: movq (%rsi),%r8
114#ifdef FIX_ALIGNMENT 1432: movq 1*8(%rsi),%r9
115 /* check for bad alignment of destination */ 1443: movq 2*8(%rsi),%r10
116 movl %edi,%ecx 1454: movq 3*8(%rsi),%r11
117 andl $7,%ecx 1465: movq %r8,(%rdi)
118 jnz .Lbad_alignment 1476: movq %r9,1*8(%rdi)
119.Lafter_bad_alignment: 1487: movq %r10,2*8(%rdi)
120#endif 1498: movq %r11,3*8(%rdi)
121 1509: movq 4*8(%rsi),%r8
122 movq %rdx,%rcx 15110: movq 5*8(%rsi),%r9
123 15211: movq 6*8(%rsi),%r10
124 movl $64,%ebx 15312: movq 7*8(%rsi),%r11
125 shrq $6,%rdx 15413: movq %r8,4*8(%rdi)
126 decq %rdx 15514: movq %r9,5*8(%rdi)
127 js .Lhandle_tail 15615: movq %r10,6*8(%rdi)
128 15716: movq %r11,7*8(%rdi)
129 .p2align 4
130.Lloop:
131.Ls1: movq (%rsi),%r11
132.Ls2: movq 1*8(%rsi),%r8
133.Ls3: movq 2*8(%rsi),%r9
134.Ls4: movq 3*8(%rsi),%r10
135.Ld1: movq %r11,(%rdi)
136.Ld2: movq %r8,1*8(%rdi)
137.Ld3: movq %r9,2*8(%rdi)
138.Ld4: movq %r10,3*8(%rdi)
139
140.Ls5: movq 4*8(%rsi),%r11
141.Ls6: movq 5*8(%rsi),%r8
142.Ls7: movq 6*8(%rsi),%r9
143.Ls8: movq 7*8(%rsi),%r10
144.Ld5: movq %r11,4*8(%rdi)
145.Ld6: movq %r8,5*8(%rdi)
146.Ld7: movq %r9,6*8(%rdi)
147.Ld8: movq %r10,7*8(%rdi)
148
149 decq %rdx
150
151 leaq 64(%rsi),%rsi 158 leaq 64(%rsi),%rsi
152 leaq 64(%rdi),%rdi 159 leaq 64(%rdi),%rdi
153
154 jns .Lloop
155
156 .p2align 4
157.Lhandle_tail:
158 movl %ecx,%edx
159 andl $63,%ecx
160 shrl $3,%ecx
161 jz .Lhandle_7
162 movl $8,%ebx
163 .p2align 4
164.Lloop_8:
165.Ls9: movq (%rsi),%r8
166.Ld9: movq %r8,(%rdi)
167 decl %ecx 160 decl %ecx
168 leaq 8(%rdi),%rdi 161 jnz 1b
16217: movl %edx,%ecx
163 andl $7,%edx
164 shrl $3,%ecx
165 jz 20f
16618: movq (%rsi),%r8
16719: movq %r8,(%rdi)
169 leaq 8(%rsi),%rsi 168 leaq 8(%rsi),%rsi
170 jnz .Lloop_8 169 leaq 8(%rdi),%rdi
171 170 decl %ecx
172.Lhandle_7: 171 jnz 18b
17220: andl %edx,%edx
173 jz 23f
173 movl %edx,%ecx 174 movl %edx,%ecx
174 andl $7,%ecx 17521: movb (%rsi),%al
175 jz .Lende 17622: movb %al,(%rdi)
176 .p2align 4
177.Lloop_1:
178.Ls10: movb (%rsi),%bl
179.Ld10: movb %bl,(%rdi)
180 incq %rdi
181 incq %rsi 177 incq %rsi
178 incq %rdi
182 decl %ecx 179 decl %ecx
183 jnz .Lloop_1 180 jnz 21b
184 18123: xor %eax,%eax
185 CFI_REMEMBER_STATE
186.Lende:
187 popq %rcx
188 CFI_ADJUST_CFA_OFFSET -8
189 CFI_RESTORE rcx
190 popq %rbx
191 CFI_ADJUST_CFA_OFFSET -8
192 CFI_RESTORE rbx
193 ret 182 ret
194 CFI_RESTORE_STATE
195 183
196#ifdef FIX_ALIGNMENT 184 .section .fixup,"ax"
197 /* align destination */ 18530: shll $6,%ecx
198 .p2align 4 186 addl %ecx,%edx
199.Lbad_alignment: 187 jmp 60f
200 movl $8,%r9d 18840: lea (%rdx,%rcx,8),%rdx
201 subl %ecx,%r9d 189 jmp 60f
202 movl %r9d,%ecx 19050: movl %ecx,%edx
203 cmpq %r9,%rdx 19160: jmp copy_user_handle_tail /* ecx is zerorest also */
204 jz .Lhandle_7 192 .previous
205 js .Lhandle_7
206.Lalign_1:
207.Ls11: movb (%rsi),%bl
208.Ld11: movb %bl,(%rdi)
209 incq %rsi
210 incq %rdi
211 decl %ecx
212 jnz .Lalign_1
213 subq %r9,%rdx
214 jmp .Lafter_bad_alignment
215#endif
216 193
217 /* table sorted by exception address */
218 .section __ex_table,"a" 194 .section __ex_table,"a"
219 .align 8 195 .align 8
220 .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ 196 .quad 1b,30b
221 .quad .Ls2,.Ls1e 197 .quad 2b,30b
222 .quad .Ls3,.Ls1e 198 .quad 3b,30b
223 .quad .Ls4,.Ls1e 199 .quad 4b,30b
224 .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ 200 .quad 5b,30b
225 .quad .Ld2,.Ls2e 201 .quad 6b,30b
226 .quad .Ld3,.Ls3e 202 .quad 7b,30b
227 .quad .Ld4,.Ls4e 203 .quad 8b,30b
228 .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ 204 .quad 9b,30b
229 .quad .Ls6,.Ls5e 205 .quad 10b,30b
230 .quad .Ls7,.Ls5e 206 .quad 11b,30b
231 .quad .Ls8,.Ls5e 207 .quad 12b,30b
232 .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ 208 .quad 13b,30b
233 .quad .Ld6,.Ls6e 209 .quad 14b,30b
234 .quad .Ld7,.Ls7e 210 .quad 15b,30b
235 .quad .Ld8,.Ls8e 211 .quad 16b,30b
236 .quad .Ls9,.Le_quad 212 .quad 18b,40b
237 .quad .Ld9,.Le_quad 213 .quad 19b,40b
238 .quad .Ls10,.Le_byte 214 .quad 21b,50b
239 .quad .Ld10,.Le_byte 215 .quad 22b,50b
240#ifdef FIX_ALIGNMENT
241 .quad .Ls11,.Lzero_rest
242 .quad .Ld11,.Lzero_rest
243#endif
244 .quad .Le5,.Le_zero
245 .previous 216 .previous
246
247 /* eax: zero, ebx: 64 */
248.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
249.Ls2e: addl $8,%eax
250.Ls3e: addl $8,%eax
251.Ls4e: addl $8,%eax
252.Ls5e: addl $8,%eax
253.Ls6e: addl $8,%eax
254.Ls7e: addl $8,%eax
255.Ls8e: addl $8,%eax
256 addq %rbx,%rdi /* +64 */
257 subq %rax,%rdi /* correct destination with computed offset */
258
259 shlq $6,%rdx /* loop counter * 64 (stride length) */
260 addq %rax,%rdx /* add offset to loopcnt */
261 andl $63,%ecx /* remaining bytes */
262 addq %rcx,%rdx /* add them */
263 jmp .Lzero_rest
264
265 /* exception on quad word loop in tail handling */
266 /* ecx: loopcnt/8, %edx: length, rdi: correct */
267.Le_quad:
268 shll $3,%ecx
269 andl $7,%edx
270 addl %ecx,%edx
271 /* edx: bytes to zero, rdi: dest, eax:zero */
272.Lzero_rest:
273 cmpl $0,(%rsp)
274 jz .Le_zero
275 movq %rdx,%rcx
276.Le_byte:
277 xorl %eax,%eax
278.Le5: rep
279 stosb
280 /* when there is another exception while zeroing the rest just return */
281.Le_zero:
282 movq %rdx,%rax
283 jmp .Lende
284 CFI_ENDPROC 217 CFI_ENDPROC
285ENDPROC(copy_user_generic) 218ENDPROC(copy_user_generic_unrolled)
286 219
287 220/* Some CPUs run faster using the string copy instructions.
288 /* Some CPUs run faster using the string copy instructions. 221 * This is also a lot simpler. Use them when possible.
289 This is also a lot simpler. Use them when possible. 222 *
290 Patch in jmps to this code instead of copying it fully 223 * Only 4GB of copy is supported. This shouldn't be a problem
291 to avoid unwanted aliasing in the exception tables. */ 224 * because the kernel normally only writes from/to page sized chunks
292 225 * even if user space passed a longer buffer.
293 /* rdi destination 226 * And more would be dangerous because both Intel and AMD have
294 * rsi source 227 * errata with rep movsq > 4GB. If someone feels the need to fix
295 * rdx count 228 * this please consider this.
296 * ecx zero flag 229 *
297 * 230 * Input:
298 * Output: 231 * rdi destination
299 * eax uncopied bytes or 0 if successfull. 232 * rsi source
300 * 233 * rdx count
301 * Only 4GB of copy is supported. This shouldn't be a problem 234 *
302 * because the kernel normally only writes from/to page sized chunks 235 * Output:
303 * even if user space passed a longer buffer. 236 * eax uncopied bytes or 0 if successful.
304 * And more would be dangerous because both Intel and AMD have 237 */
305 * errata with rep movsq > 4GB. If someone feels the need to fix
306 * this please consider this.
307 */
308ENTRY(copy_user_generic_string) 238ENTRY(copy_user_generic_string)
309 CFI_STARTPROC 239 CFI_STARTPROC
310 movl %ecx,%r8d /* save zero flag */ 240 andl %edx,%edx
241 jz 4f
242 cmpl $8,%edx
243 jb 2f /* less than 8 bytes, go to byte copy loop */
244 ALIGN_DESTINATION
311 movl %edx,%ecx 245 movl %edx,%ecx
312 shrl $3,%ecx 246 shrl $3,%ecx
313 andl $7,%edx 247 andl $7,%edx
314 jz 10f 2481: rep
3151: rep
316 movsq
317 movl %edx,%ecx
3182: rep
319 movsb
3209: movl %ecx,%eax
321 ret
322
323 /* multiple of 8 byte */
32410: rep
325 movsq 249 movsq
326 xor %eax,%eax 2502: movl %edx,%ecx
2513: rep
252 movsb
2534: xorl %eax,%eax
327 ret 254 ret
328 255
329 /* exception handling */ 256 .section .fixup,"ax"
3303: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ 25711: lea (%rdx,%rcx,8),%rcx
331 jmp 6f 25812: movl %ecx,%edx /* ecx is zerorest also */
3325: movl %ecx,%eax /* exception on byte loop */ 259 jmp copy_user_handle_tail
333 /* eax: left over bytes */ 260 .previous
3346: testl %r8d,%r8d /* zero flag set? */
335 jz 7f
336 movl %eax,%ecx /* initialize x86 loop counter */
337 push %rax
338 xorl %eax,%eax
3398: rep
340 stosb /* zero the rest */
34111: pop %rax
3427: ret
343 CFI_ENDPROC
344END(copy_user_generic_c)
345 261
346 .section __ex_table,"a" 262 .section __ex_table,"a"
347 .quad 1b,3b 263 .align 8
348 .quad 2b,5b 264 .quad 1b,11b
349 .quad 8b,11b 265 .quad 3b,12b
350 .quad 10b,3b
351 .previous 266 .previous
267 CFI_ENDPROC
268ENDPROC(copy_user_generic_string)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 9d3d1ab83763..40e0e309d27e 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -1,4 +1,6 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs. 1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2. 4 * Subject to the GNU Public License v2.
3 * 5 *
4 * Functions to copy from and to user space. 6 * Functions to copy from and to user space.
@@ -12,204 +14,125 @@
12#include <asm/current.h> 14#include <asm/current.h>
13#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
14#include <asm/thread_info.h> 16#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17/*
18 * copy_user_nocache - Uncached memory copy with exception handling
19 * This will force destination/source out of cache for more performance.
20 *
21 * Input:
22 * rdi destination
23 * rsi source
24 * rdx count
25 * rcx zero flag when 1 zero on exception
26 *
27 * Output:
28 * eax uncopied bytes or 0 if successful.
29 */
30ENTRY(__copy_user_nocache)
31 CFI_STARTPROC
32 pushq %rbx
33 CFI_ADJUST_CFA_OFFSET 8
34 CFI_REL_OFFSET rbx, 0
35 pushq %rcx /* save zero flag */
36 CFI_ADJUST_CFA_OFFSET 8
37 CFI_REL_OFFSET rcx, 0
38
39 xorl %eax,%eax /* zero for the exception handler */
40 17
18 .macro ALIGN_DESTINATION
41#ifdef FIX_ALIGNMENT 19#ifdef FIX_ALIGNMENT
42 /* check for bad alignment of destination */ 20 /* check for bad alignment of destination */
43 movl %edi,%ecx 21 movl %edi,%ecx
44 andl $7,%ecx 22 andl $7,%ecx
45 jnz .Lbad_alignment 23 jz 102f /* already aligned */
46.Lafter_bad_alignment: 24 subl $8,%ecx
47#endif 25 negl %ecx
48 26 subl %ecx,%edx
49 movq %rdx,%rcx 27100: movb (%rsi),%al
50 28101: movb %al,(%rdi)
51 movl $64,%ebx 29 incq %rsi
52 shrq $6,%rdx 30 incq %rdi
53 decq %rdx 31 decl %ecx
54 js .Lhandle_tail 32 jnz 100b
55 33102:
56 .p2align 4 34 .section .fixup,"ax"
57.Lloop: 35103: addl %r8d,%edx /* ecx is zerorest also */
58.Ls1: movq (%rsi),%r11 36 jmp copy_user_handle_tail
59.Ls2: movq 1*8(%rsi),%r8 37 .previous
60.Ls3: movq 2*8(%rsi),%r9
61.Ls4: movq 3*8(%rsi),%r10
62.Ld1: movnti %r11,(%rdi)
63.Ld2: movnti %r8,1*8(%rdi)
64.Ld3: movnti %r9,2*8(%rdi)
65.Ld4: movnti %r10,3*8(%rdi)
66
67.Ls5: movq 4*8(%rsi),%r11
68.Ls6: movq 5*8(%rsi),%r8
69.Ls7: movq 6*8(%rsi),%r9
70.Ls8: movq 7*8(%rsi),%r10
71.Ld5: movnti %r11,4*8(%rdi)
72.Ld6: movnti %r8,5*8(%rdi)
73.Ld7: movnti %r9,6*8(%rdi)
74.Ld8: movnti %r10,7*8(%rdi)
75 38
76 dec %rdx 39 .section __ex_table,"a"
40 .align 8
41 .quad 100b,103b
42 .quad 101b,103b
43 .previous
44#endif
45 .endm
77 46
47/*
48 * copy_user_nocache - Uncached memory copy with exception handling
49 * This will force destination/source out of cache for more performance.
50 */
51ENTRY(__copy_user_nocache)
52 CFI_STARTPROC
53 cmpl $8,%edx
54 jb 20f /* less then 8 bytes, go to byte copy loop */
55 ALIGN_DESTINATION
56 movl %edx,%ecx
57 andl $63,%edx
58 shrl $6,%ecx
59 jz 17f
601: movq (%rsi),%r8
612: movq 1*8(%rsi),%r9
623: movq 2*8(%rsi),%r10
634: movq 3*8(%rsi),%r11
645: movnti %r8,(%rdi)
656: movnti %r9,1*8(%rdi)
667: movnti %r10,2*8(%rdi)
678: movnti %r11,3*8(%rdi)
689: movq 4*8(%rsi),%r8
6910: movq 5*8(%rsi),%r9
7011: movq 6*8(%rsi),%r10
7112: movq 7*8(%rsi),%r11
7213: movnti %r8,4*8(%rdi)
7314: movnti %r9,5*8(%rdi)
7415: movnti %r10,6*8(%rdi)
7516: movnti %r11,7*8(%rdi)
78 leaq 64(%rsi),%rsi 76 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi 77 leaq 64(%rdi),%rdi
80
81 jns .Lloop
82
83 .p2align 4
84.Lhandle_tail:
85 movl %ecx,%edx
86 andl $63,%ecx
87 shrl $3,%ecx
88 jz .Lhandle_7
89 movl $8,%ebx
90 .p2align 4
91.Lloop_8:
92.Ls9: movq (%rsi),%r8
93.Ld9: movnti %r8,(%rdi)
94 decl %ecx 78 decl %ecx
95 leaq 8(%rdi),%rdi 79 jnz 1b
8017: movl %edx,%ecx
81 andl $7,%edx
82 shrl $3,%ecx
83 jz 20f
8418: movq (%rsi),%r8
8519: movnti %r8,(%rdi)
96 leaq 8(%rsi),%rsi 86 leaq 8(%rsi),%rsi
97 jnz .Lloop_8 87 leaq 8(%rdi),%rdi
98 88 decl %ecx
99.Lhandle_7: 89 jnz 18b
9020: andl %edx,%edx
91 jz 23f
100 movl %edx,%ecx 92 movl %edx,%ecx
101 andl $7,%ecx 9321: movb (%rsi),%al
102 jz .Lende 9422: movb %al,(%rdi)
103 .p2align 4
104.Lloop_1:
105.Ls10: movb (%rsi),%bl
106.Ld10: movb %bl,(%rdi)
107 incq %rdi
108 incq %rsi 95 incq %rsi
96 incq %rdi
109 decl %ecx 97 decl %ecx
110 jnz .Lloop_1 98 jnz 21b
111 9923: xorl %eax,%eax
112 CFI_REMEMBER_STATE
113.Lende:
114 popq %rcx
115 CFI_ADJUST_CFA_OFFSET -8
116 CFI_RESTORE %rcx
117 popq %rbx
118 CFI_ADJUST_CFA_OFFSET -8
119 CFI_RESTORE rbx
120 sfence 100 sfence
121 ret 101 ret
122 CFI_RESTORE_STATE
123 102
124#ifdef FIX_ALIGNMENT 103 .section .fixup,"ax"
125 /* align destination */ 10430: shll $6,%ecx
126 .p2align 4 105 addl %ecx,%edx
127.Lbad_alignment: 106 jmp 60f
128 movl $8,%r9d 10740: lea (%rdx,%rcx,8),%rdx
129 subl %ecx,%r9d 108 jmp 60f
130 movl %r9d,%ecx 10950: movl %ecx,%edx
131 cmpq %r9,%rdx 11060: sfence
132 jz .Lhandle_7 111 movl %r8d,%ecx
133 js .Lhandle_7 112 jmp copy_user_handle_tail
134.Lalign_1: 113 .previous
135.Ls11: movb (%rsi),%bl
136.Ld11: movb %bl,(%rdi)
137 incq %rsi
138 incq %rdi
139 decl %ecx
140 jnz .Lalign_1
141 subq %r9,%rdx
142 jmp .Lafter_bad_alignment
143#endif
144 114
145 /* table sorted by exception address */
146 .section __ex_table,"a" 115 .section __ex_table,"a"
147 .align 8 116 .quad 1b,30b
148 .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ 117 .quad 2b,30b
149 .quad .Ls2,.Ls1e 118 .quad 3b,30b
150 .quad .Ls3,.Ls1e 119 .quad 4b,30b
151 .quad .Ls4,.Ls1e 120 .quad 5b,30b
152 .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ 121 .quad 6b,30b
153 .quad .Ld2,.Ls2e 122 .quad 7b,30b
154 .quad .Ld3,.Ls3e 123 .quad 8b,30b
155 .quad .Ld4,.Ls4e 124 .quad 9b,30b
156 .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ 125 .quad 10b,30b
157 .quad .Ls6,.Ls5e 126 .quad 11b,30b
158 .quad .Ls7,.Ls5e 127 .quad 12b,30b
159 .quad .Ls8,.Ls5e 128 .quad 13b,30b
160 .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ 129 .quad 14b,30b
161 .quad .Ld6,.Ls6e 130 .quad 15b,30b
162 .quad .Ld7,.Ls7e 131 .quad 16b,30b
163 .quad .Ld8,.Ls8e 132 .quad 18b,40b
164 .quad .Ls9,.Le_quad 133 .quad 19b,40b
165 .quad .Ld9,.Le_quad 134 .quad 21b,50b
166 .quad .Ls10,.Le_byte 135 .quad 22b,50b
167 .quad .Ld10,.Le_byte
168#ifdef FIX_ALIGNMENT
169 .quad .Ls11,.Lzero_rest
170 .quad .Ld11,.Lzero_rest
171#endif
172 .quad .Le5,.Le_zero
173 .previous 136 .previous
174
175 /* eax: zero, ebx: 64 */
176.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
177.Ls2e: addl $8,%eax
178.Ls3e: addl $8,%eax
179.Ls4e: addl $8,%eax
180.Ls5e: addl $8,%eax
181.Ls6e: addl $8,%eax
182.Ls7e: addl $8,%eax
183.Ls8e: addl $8,%eax
184 addq %rbx,%rdi /* +64 */
185 subq %rax,%rdi /* correct destination with computed offset */
186
187 shlq $6,%rdx /* loop counter * 64 (stride length) */
188 addq %rax,%rdx /* add offset to loopcnt */
189 andl $63,%ecx /* remaining bytes */
190 addq %rcx,%rdx /* add them */
191 jmp .Lzero_rest
192
193 /* exception on quad word loop in tail handling */
194 /* ecx: loopcnt/8, %edx: length, rdi: correct */
195.Le_quad:
196 shll $3,%ecx
197 andl $7,%edx
198 addl %ecx,%edx
199 /* edx: bytes to zero, rdi: dest, eax:zero */
200.Lzero_rest:
201 cmpl $0,(%rsp) /* zero flag set? */
202 jz .Le_zero
203 movq %rdx,%rcx
204.Le_byte:
205 xorl %eax,%eax
206.Le5: rep
207 stosb
208 /* when there is another exception while zeroing the rest just return */
209.Le_zero:
210 movq %rdx,%rax
211 jmp .Lende
212 CFI_ENDPROC 137 CFI_ENDPROC
213ENDPROC(__copy_user_nocache) 138ENDPROC(__copy_user_nocache)
214
215
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay.c
index d710f2d167bb..f4568605d7d5 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 1993 Linus Torvalds 4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> 5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
6 * 7 *
7 * The __delay function must _NOT_ be inlined as its execution time 8 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors. The additional 9 * depends wildly on alignment on many x86 processors. The additional
@@ -28,16 +29,22 @@
28/* simple loop based delay: */ 29/* simple loop based delay: */
29static void delay_loop(unsigned long loops) 30static void delay_loop(unsigned long loops)
30{ 31{
31 int d0; 32 asm volatile(
32 33 " test %0,%0 \n"
33 __asm__ __volatile__( 34 " jz 3f \n"
34 "\tjmp 1f\n" 35 " jmp 1f \n"
35 ".align 16\n" 36
36 "1:\tjmp 2f\n" 37 ".align 16 \n"
37 ".align 16\n" 38 "1: jmp 2f \n"
38 "2:\tdecl %0\n\tjns 2b" 39
39 :"=&a" (d0) 40 ".align 16 \n"
40 :"0" (loops)); 41 "2: dec %0 \n"
42 " jnz 2b \n"
43 "3: dec %0 \n"
44
45 : /* we don't need output */
46 :"a" (loops)
47 );
41} 48}
42 49
43/* TSC based delay: */ 50/* TSC based delay: */
@@ -91,7 +98,7 @@ void use_tsc_delay(void)
91int __devinit read_current_timer(unsigned long *timer_val) 98int __devinit read_current_timer(unsigned long *timer_val)
92{ 99{
93 if (delay_fn == delay_tsc) { 100 if (delay_fn == delay_tsc) {
94 rdtscl(*timer_val); 101 rdtscll(*timer_val);
95 return 0; 102 return 0;
96 } 103 }
97 return -1; 104 return -1;
@@ -101,31 +108,30 @@ void __delay(unsigned long loops)
101{ 108{
102 delay_fn(loops); 109 delay_fn(loops);
103} 110}
111EXPORT_SYMBOL(__delay);
104 112
105inline void __const_udelay(unsigned long xloops) 113inline void __const_udelay(unsigned long xloops)
106{ 114{
107 int d0; 115 int d0;
108 116
109 xloops *= 4; 117 xloops *= 4;
110 __asm__("mull %0" 118 asm("mull %%edx"
111 :"=d" (xloops), "=&a" (d0) 119 :"=d" (xloops), "=&a" (d0)
112 :"1" (xloops), "0" 120 :"1" (xloops), "0"
113 (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); 121 (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
114 122
115 __delay(++xloops); 123 __delay(++xloops);
116} 124}
125EXPORT_SYMBOL(__const_udelay);
117 126
118void __udelay(unsigned long usecs) 127void __udelay(unsigned long usecs)
119{ 128{
120 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 129 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
121} 130}
131EXPORT_SYMBOL(__udelay);
122 132
123void __ndelay(unsigned long nsecs) 133void __ndelay(unsigned long nsecs)
124{ 134{
125 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 135 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
126} 136}
127
128EXPORT_SYMBOL(__delay);
129EXPORT_SYMBOL(__const_udelay);
130EXPORT_SYMBOL(__udelay);
131EXPORT_SYMBOL(__ndelay); 137EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
deleted file mode 100644
index 4c441be92641..000000000000
--- a/arch/x86/lib/delay_64.c
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * Precise Delay Loops for x86-64
3 *
4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 *
7 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors.
9 */
10
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/timex.h>
14#include <linux/preempt.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17
18#include <asm/delay.h>
19#include <asm/msr.h>
20
21#ifdef CONFIG_SMP
22#include <asm/smp.h>
23#endif
24
25int __devinit read_current_timer(unsigned long *timer_value)
26{
27 rdtscll(*timer_value);
28 return 0;
29}
30
31void __delay(unsigned long loops)
32{
33 unsigned bclock, now;
34 int cpu;
35
36 preempt_disable();
37 cpu = smp_processor_id();
38 rdtscl(bclock);
39 for (;;) {
40 rdtscl(now);
41 if ((now - bclock) >= loops)
42 break;
43
44 /* Allow RT tasks to run */
45 preempt_enable();
46 rep_nop();
47 preempt_disable();
48
49 /*
50 * It is possible that we moved to another CPU, and
51 * since TSC's are per-cpu we need to calculate
52 * that. The delay must guarantee that we wait "at
53 * least" the amount of time. Being moved to another
54 * CPU could make the wait longer but we just need to
55 * make sure we waited long enough. Rebalance the
56 * counter for this CPU.
57 */
58 if (unlikely(cpu != smp_processor_id())) {
59 loops -= (now - bclock);
60 cpu = smp_processor_id();
61 rdtscl(bclock);
62 }
63 }
64 preempt_enable();
65}
66EXPORT_SYMBOL(__delay);
67
68inline void __const_udelay(unsigned long xloops)
69{
70 __delay(((xloops * HZ *
71 cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1);
72}
73EXPORT_SYMBOL(__const_udelay);
74
75void __udelay(unsigned long usecs)
76{
77 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
78}
79EXPORT_SYMBOL(__udelay);
80
81void __ndelay(unsigned long nsecs)
82{
83 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
84}
85EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser.S
index 5448876261f8..ad374003742f 100644
--- a/arch/x86/lib/getuser_64.S
+++ b/arch/x86/lib/getuser.S
@@ -3,6 +3,7 @@
3 * 3 *
4 * (C) Copyright 1998 Linus Torvalds 4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen 5 * (C) Copyright 2005 Andi Kleen
6 * (C) Copyright 2008 Glauber Costa
6 * 7 *
7 * These functions have a non-standard call interface 8 * These functions have a non-standard call interface
8 * to make them more efficient, especially as they 9 * to make them more efficient, especially as they
@@ -13,14 +14,13 @@
13/* 14/*
14 * __get_user_X 15 * __get_user_X
15 * 16 *
16 * Inputs: %rcx contains the address. 17 * Inputs: %[r|e]ax contains the address.
17 * The register is modified, but all changes are undone 18 * The register is modified, but all changes are undone
18 * before returning because the C code doesn't know about it. 19 * before returning because the C code doesn't know about it.
19 * 20 *
20 * Outputs: %rax is error code (0 or -EFAULT) 21 * Outputs: %[r|e]ax is error code (0 or -EFAULT)
21 * %rdx contains zero-extended value 22 * %[r|e]dx contains zero-extended value
22 * 23 *
23 * %r8 is destroyed.
24 * 24 *
25 * These functions should not modify any other registers, 25 * These functions should not modify any other registers,
26 * as they get called from within inline assembly. 26 * as they get called from within inline assembly.
@@ -32,78 +32,73 @@
32#include <asm/errno.h> 32#include <asm/errno.h>
33#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
34#include <asm/thread_info.h> 34#include <asm/thread_info.h>
35#include <asm/asm.h>
35 36
36 .text 37 .text
37ENTRY(__get_user_1) 38ENTRY(__get_user_1)
38 CFI_STARTPROC 39 CFI_STARTPROC
39 GET_THREAD_INFO(%r8) 40 GET_THREAD_INFO(%_ASM_DX)
40 cmpq threadinfo_addr_limit(%r8),%rcx 41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
41 jae bad_get_user 42 jae bad_get_user
421: movzb (%rcx),%edx 431: movzb (%_ASM_AX),%edx
43 xorl %eax,%eax 44 xor %eax,%eax
44 ret 45 ret
45 CFI_ENDPROC 46 CFI_ENDPROC
46ENDPROC(__get_user_1) 47ENDPROC(__get_user_1)
47 48
48ENTRY(__get_user_2) 49ENTRY(__get_user_2)
49 CFI_STARTPROC 50 CFI_STARTPROC
50 GET_THREAD_INFO(%r8) 51 add $1,%_ASM_AX
51 addq $1,%rcx 52 jc bad_get_user
52 jc 20f 53 GET_THREAD_INFO(%_ASM_DX)
53 cmpq threadinfo_addr_limit(%r8),%rcx 54 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
54 jae 20f 55 jae bad_get_user
55 decq %rcx 562: movzwl -1(%_ASM_AX),%edx
562: movzwl (%rcx),%edx 57 xor %eax,%eax
57 xorl %eax,%eax
58 ret 58 ret
5920: decq %rcx
60 jmp bad_get_user
61 CFI_ENDPROC 59 CFI_ENDPROC
62ENDPROC(__get_user_2) 60ENDPROC(__get_user_2)
63 61
64ENTRY(__get_user_4) 62ENTRY(__get_user_4)
65 CFI_STARTPROC 63 CFI_STARTPROC
66 GET_THREAD_INFO(%r8) 64 add $3,%_ASM_AX
67 addq $3,%rcx 65 jc bad_get_user
68 jc 30f 66 GET_THREAD_INFO(%_ASM_DX)
69 cmpq threadinfo_addr_limit(%r8),%rcx 67 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
70 jae 30f 68 jae bad_get_user
71 subq $3,%rcx 693: mov -3(%_ASM_AX),%edx
723: movl (%rcx),%edx 70 xor %eax,%eax
73 xorl %eax,%eax
74 ret 71 ret
7530: subq $3,%rcx
76 jmp bad_get_user
77 CFI_ENDPROC 72 CFI_ENDPROC
78ENDPROC(__get_user_4) 73ENDPROC(__get_user_4)
79 74
75#ifdef CONFIG_X86_64
80ENTRY(__get_user_8) 76ENTRY(__get_user_8)
81 CFI_STARTPROC 77 CFI_STARTPROC
82 GET_THREAD_INFO(%r8) 78 add $7,%_ASM_AX
83 addq $7,%rcx 79 jc bad_get_user
84 jc 40f 80 GET_THREAD_INFO(%_ASM_DX)
85 cmpq threadinfo_addr_limit(%r8),%rcx 81 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
86 jae 40f 82 jae bad_get_user
87 subq $7,%rcx 834: movq -7(%_ASM_AX),%_ASM_DX
884: movq (%rcx),%rdx 84 xor %eax,%eax
89 xorl %eax,%eax
90 ret 85 ret
9140: subq $7,%rcx
92 jmp bad_get_user
93 CFI_ENDPROC 86 CFI_ENDPROC
94ENDPROC(__get_user_8) 87ENDPROC(__get_user_8)
88#endif
95 89
96bad_get_user: 90bad_get_user:
97 CFI_STARTPROC 91 CFI_STARTPROC
98 xorl %edx,%edx 92 xor %edx,%edx
99 movq $(-EFAULT),%rax 93 mov $(-EFAULT),%_ASM_AX
100 ret 94 ret
101 CFI_ENDPROC 95 CFI_ENDPROC
102END(bad_get_user) 96END(bad_get_user)
103 97
104.section __ex_table,"a" 98.section __ex_table,"a"
105 .quad 1b,bad_get_user 99 _ASM_PTR 1b,bad_get_user
106 .quad 2b,bad_get_user 100 _ASM_PTR 2b,bad_get_user
107 .quad 3b,bad_get_user 101 _ASM_PTR 3b,bad_get_user
108 .quad 4b,bad_get_user 102#ifdef CONFIG_X86_64
109.previous 103 _ASM_PTR 4b,bad_get_user
104#endif
diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S
deleted file mode 100644
index 6d84b53f12a2..000000000000
--- a/arch/x86/lib/getuser_32.S
+++ /dev/null
@@ -1,78 +0,0 @@
1/*
2 * __get_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 *
6 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they
8 * return an error value in addition to the "real"
9 * return value.
10 */
11#include <linux/linkage.h>
12#include <asm/dwarf2.h>
13#include <asm/thread_info.h>
14
15
16/*
17 * __get_user_X
18 *
19 * Inputs: %eax contains the address
20 *
21 * Outputs: %eax is error code (0 or -EFAULT)
22 * %edx contains zero-extended value
23 *
24 * These functions should not modify any other registers,
25 * as they get called from within inline assembly.
26 */
27
28.text
29ENTRY(__get_user_1)
30 CFI_STARTPROC
31 GET_THREAD_INFO(%edx)
32 cmpl TI_addr_limit(%edx),%eax
33 jae bad_get_user
341: movzbl (%eax),%edx
35 xorl %eax,%eax
36 ret
37 CFI_ENDPROC
38ENDPROC(__get_user_1)
39
40ENTRY(__get_user_2)
41 CFI_STARTPROC
42 addl $1,%eax
43 jc bad_get_user
44 GET_THREAD_INFO(%edx)
45 cmpl TI_addr_limit(%edx),%eax
46 jae bad_get_user
472: movzwl -1(%eax),%edx
48 xorl %eax,%eax
49 ret
50 CFI_ENDPROC
51ENDPROC(__get_user_2)
52
53ENTRY(__get_user_4)
54 CFI_STARTPROC
55 addl $3,%eax
56 jc bad_get_user
57 GET_THREAD_INFO(%edx)
58 cmpl TI_addr_limit(%edx),%eax
59 jae bad_get_user
603: movl -3(%eax),%edx
61 xorl %eax,%eax
62 ret
63 CFI_ENDPROC
64ENDPROC(__get_user_4)
65
66bad_get_user:
67 CFI_STARTPROC
68 xorl %edx,%edx
69 movl $-14,%eax
70 ret
71 CFI_ENDPROC
72END(bad_get_user)
73
74.section __ex_table,"a"
75 .long 1b,bad_get_user
76 .long 2b,bad_get_user
77 .long 3b,bad_get_user
78.previous
diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser.S
index f58fba109d18..36b0d15ae6e9 100644
--- a/arch/x86/lib/putuser_32.S
+++ b/arch/x86/lib/putuser.S
@@ -2,6 +2,8 @@
2 * __put_user functions. 2 * __put_user functions.
3 * 3 *
4 * (C) Copyright 2005 Linus Torvalds 4 * (C) Copyright 2005 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen
6 * (C) Copyright 2008 Glauber Costa
5 * 7 *
6 * These functions have a non-standard call interface 8 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they 9 * to make them more efficient, especially as they
@@ -11,6 +13,8 @@
11#include <linux/linkage.h> 13#include <linux/linkage.h>
12#include <asm/dwarf2.h> 14#include <asm/dwarf2.h>
13#include <asm/thread_info.h> 15#include <asm/thread_info.h>
16#include <asm/errno.h>
17#include <asm/asm.h>
14 18
15 19
16/* 20/*
@@ -26,73 +30,68 @@
26 */ 30 */
27 31
28#define ENTER CFI_STARTPROC ; \ 32#define ENTER CFI_STARTPROC ; \
29 pushl %ebx ; \ 33 GET_THREAD_INFO(%_ASM_BX)
30 CFI_ADJUST_CFA_OFFSET 4 ; \ 34#define EXIT ret ; \
31 CFI_REL_OFFSET ebx, 0 ; \
32 GET_THREAD_INFO(%ebx)
33#define EXIT popl %ebx ; \
34 CFI_ADJUST_CFA_OFFSET -4 ; \
35 CFI_RESTORE ebx ; \
36 ret ; \
37 CFI_ENDPROC 35 CFI_ENDPROC
38 36
39.text 37.text
40ENTRY(__put_user_1) 38ENTRY(__put_user_1)
41 ENTER 39 ENTER
42 cmpl TI_addr_limit(%ebx),%ecx 40 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
43 jae bad_put_user 41 jae bad_put_user
441: movb %al,(%ecx) 421: movb %al,(%_ASM_CX)
45 xorl %eax,%eax 43 xor %eax,%eax
46 EXIT 44 EXIT
47ENDPROC(__put_user_1) 45ENDPROC(__put_user_1)
48 46
49ENTRY(__put_user_2) 47ENTRY(__put_user_2)
50 ENTER 48 ENTER
51 movl TI_addr_limit(%ebx),%ebx 49 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
52 subl $1,%ebx 50 sub $1,%_ASM_BX
53 cmpl %ebx,%ecx 51 cmp %_ASM_BX,%_ASM_CX
54 jae bad_put_user 52 jae bad_put_user
552: movw %ax,(%ecx) 532: movw %ax,(%_ASM_CX)
56 xorl %eax,%eax 54 xor %eax,%eax
57 EXIT 55 EXIT
58ENDPROC(__put_user_2) 56ENDPROC(__put_user_2)
59 57
60ENTRY(__put_user_4) 58ENTRY(__put_user_4)
61 ENTER 59 ENTER
62 movl TI_addr_limit(%ebx),%ebx 60 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
63 subl $3,%ebx 61 sub $3,%_ASM_BX
64 cmpl %ebx,%ecx 62 cmp %_ASM_BX,%_ASM_CX
65 jae bad_put_user 63 jae bad_put_user
663: movl %eax,(%ecx) 643: movl %eax,(%_ASM_CX)
67 xorl %eax,%eax 65 xor %eax,%eax
68 EXIT 66 EXIT
69ENDPROC(__put_user_4) 67ENDPROC(__put_user_4)
70 68
71ENTRY(__put_user_8) 69ENTRY(__put_user_8)
72 ENTER 70 ENTER
73 movl TI_addr_limit(%ebx),%ebx 71 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
74 subl $7,%ebx 72 sub $7,%_ASM_BX
75 cmpl %ebx,%ecx 73 cmp %_ASM_BX,%_ASM_CX
76 jae bad_put_user 74 jae bad_put_user
774: movl %eax,(%ecx) 754: mov %_ASM_AX,(%_ASM_CX)
785: movl %edx,4(%ecx) 76#ifdef CONFIG_X86_32
79 xorl %eax,%eax 775: movl %edx,4(%_ASM_CX)
78#endif
79 xor %eax,%eax
80 EXIT 80 EXIT
81ENDPROC(__put_user_8) 81ENDPROC(__put_user_8)
82 82
83bad_put_user: 83bad_put_user:
84 CFI_STARTPROC simple 84 CFI_STARTPROC
85 CFI_DEF_CFA esp, 2*4 85 movl $-EFAULT,%eax
86 CFI_OFFSET eip, -1*4
87 CFI_OFFSET ebx, -2*4
88 movl $-14,%eax
89 EXIT 86 EXIT
90END(bad_put_user) 87END(bad_put_user)
91 88
92.section __ex_table,"a" 89.section __ex_table,"a"
93 .long 1b,bad_put_user 90 _ASM_PTR 1b,bad_put_user
94 .long 2b,bad_put_user 91 _ASM_PTR 2b,bad_put_user
95 .long 3b,bad_put_user 92 _ASM_PTR 3b,bad_put_user
96 .long 4b,bad_put_user 93 _ASM_PTR 4b,bad_put_user
97 .long 5b,bad_put_user 94#ifdef CONFIG_X86_32
95 _ASM_PTR 5b,bad_put_user
96#endif
98.previous 97.previous
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
deleted file mode 100644
index 4989f5a8fa9b..000000000000
--- a/arch/x86/lib/putuser_64.S
+++ /dev/null
@@ -1,106 +0,0 @@
1/*
2 * __put_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen
6 *
7 * These functions have a non-standard call interface
8 * to make them more efficient, especially as they
9 * return an error value in addition to the "real"
10 * return value.
11 */
12
13/*
14 * __put_user_X
15 *
16 * Inputs: %rcx contains the address
17 * %rdx contains new value
18 *
19 * Outputs: %rax is error code (0 or -EFAULT)
20 *
21 * %r8 is destroyed.
22 *
23 * These functions should not modify any other registers,
24 * as they get called from within inline assembly.
25 */
26
27#include <linux/linkage.h>
28#include <asm/dwarf2.h>
29#include <asm/page.h>
30#include <asm/errno.h>
31#include <asm/asm-offsets.h>
32#include <asm/thread_info.h>
33
34 .text
35ENTRY(__put_user_1)
36 CFI_STARTPROC
37 GET_THREAD_INFO(%r8)
38 cmpq threadinfo_addr_limit(%r8),%rcx
39 jae bad_put_user
401: movb %dl,(%rcx)
41 xorl %eax,%eax
42 ret
43 CFI_ENDPROC
44ENDPROC(__put_user_1)
45
46ENTRY(__put_user_2)
47 CFI_STARTPROC
48 GET_THREAD_INFO(%r8)
49 addq $1,%rcx
50 jc 20f
51 cmpq threadinfo_addr_limit(%r8),%rcx
52 jae 20f
53 decq %rcx
542: movw %dx,(%rcx)
55 xorl %eax,%eax
56 ret
5720: decq %rcx
58 jmp bad_put_user
59 CFI_ENDPROC
60ENDPROC(__put_user_2)
61
62ENTRY(__put_user_4)
63 CFI_STARTPROC
64 GET_THREAD_INFO(%r8)
65 addq $3,%rcx
66 jc 30f
67 cmpq threadinfo_addr_limit(%r8),%rcx
68 jae 30f
69 subq $3,%rcx
703: movl %edx,(%rcx)
71 xorl %eax,%eax
72 ret
7330: subq $3,%rcx
74 jmp bad_put_user
75 CFI_ENDPROC
76ENDPROC(__put_user_4)
77
78ENTRY(__put_user_8)
79 CFI_STARTPROC
80 GET_THREAD_INFO(%r8)
81 addq $7,%rcx
82 jc 40f
83 cmpq threadinfo_addr_limit(%r8),%rcx
84 jae 40f
85 subq $7,%rcx
864: movq %rdx,(%rcx)
87 xorl %eax,%eax
88 ret
8940: subq $7,%rcx
90 jmp bad_put_user
91 CFI_ENDPROC
92ENDPROC(__put_user_8)
93
94bad_put_user:
95 CFI_STARTPROC
96 movq $(-EFAULT),%rax
97 ret
98 CFI_ENDPROC
99END(bad_put_user)
100
101.section __ex_table,"a"
102 .quad 1b,bad_put_user
103 .quad 2b,bad_put_user
104 .quad 3b,bad_put_user
105 .quad 4b,bad_put_user
106.previous
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0c89d1bb0287..f4df6e7c718b 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
158} 158}
159EXPORT_SYMBOL(copy_in_user); 159EXPORT_SYMBOL(copy_in_user);
160 160
161/*
162 * Try to copy last bytes and clear the rest if needed.
163 * Since protection fault in copy_from/to_user is not a normal situation,
164 * it is not necessary to optimize tail handling.
165 */
166unsigned long
167copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
168{
169 char c;
170 unsigned zero_len;
171
172 for (; len; --len) {
173 if (__get_user_nocheck(c, from++, sizeof(char)))
174 break;
175 if (__put_user_nocheck(c, to++, sizeof(char)))
176 break;
177 }
178
179 for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
180 if (__put_user_nocheck(c, to++, sizeof(char)))
181 break;
182 return len;
183}
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 0c28a071824c..48278fa7d3de 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -10,6 +10,14 @@
10#include <asm/e820.h> 10#include <asm/e820.h>
11#include <asm/setup.h> 11#include <asm/setup.h>
12 12
13/*
14 * Any quirks to be performed to initialize timers/irqs/etc?
15 */
16int (*arch_time_init_quirk)(void);
17int (*arch_pre_intr_init_quirk)(void);
18int (*arch_intr_init_quirk)(void);
19int (*arch_trap_init_quirk)(void);
20
13#ifdef CONFIG_HOTPLUG_CPU 21#ifdef CONFIG_HOTPLUG_CPU
14#define DEFAULT_SEND_IPI (1) 22#define DEFAULT_SEND_IPI (1)
15#else 23#else
@@ -29,6 +37,10 @@ int no_broadcast=DEFAULT_SEND_IPI;
29 **/ 37 **/
30void __init pre_intr_init_hook(void) 38void __init pre_intr_init_hook(void)
31{ 39{
40 if (arch_pre_intr_init_quirk) {
41 if (arch_pre_intr_init_quirk())
42 return;
43 }
32 init_ISA_irqs(); 44 init_ISA_irqs();
33} 45}
34 46
@@ -52,6 +64,10 @@ static struct irqaction irq2 = {
52 **/ 64 **/
53void __init intr_init_hook(void) 65void __init intr_init_hook(void)
54{ 66{
67 if (arch_intr_init_quirk) {
68 if (arch_intr_init_quirk())
69 return;
70 }
55#ifdef CONFIG_X86_LOCAL_APIC 71#ifdef CONFIG_X86_LOCAL_APIC
56 apic_intr_init(); 72 apic_intr_init();
57#endif 73#endif
@@ -65,7 +81,7 @@ void __init intr_init_hook(void)
65 * 81 *
66 * Description: 82 * Description:
67 * generally used to activate any machine specific identification 83 * generally used to activate any machine specific identification
68 * routines that may be needed before setup_arch() runs. On VISWS 84 * routines that may be needed before setup_arch() runs. On Voyager
69 * this is used to get the board revision and type. 85 * this is used to get the board revision and type.
70 **/ 86 **/
71void __init pre_setup_arch_hook(void) 87void __init pre_setup_arch_hook(void)
@@ -81,6 +97,10 @@ void __init pre_setup_arch_hook(void)
81 **/ 97 **/
82void __init trap_init_hook(void) 98void __init trap_init_hook(void)
83{ 99{
100 if (arch_trap_init_quirk) {
101 if (arch_trap_init_quirk())
102 return;
103 }
84} 104}
85 105
86static struct irqaction irq0 = { 106static struct irqaction irq0 = {
@@ -99,6 +119,16 @@ static struct irqaction irq0 = {
99 **/ 119 **/
100void __init time_init_hook(void) 120void __init time_init_hook(void)
101{ 121{
122 if (arch_time_init_quirk) {
123 /*
124 * A nonzero return code does not mean failure, it means
125 * that the architecture quirk does not want any
126 * generic (timer) setup to be performed after this:
127 */
128 if (arch_time_init_quirk())
129 return;
130 }
131
102 irq0.mask = cpumask_of_cpu(0); 132 irq0.mask = cpumask_of_cpu(0);
103 setup_irq(0, &irq0); 133 setup_irq(0, &irq0);
104} 134}
@@ -142,45 +172,3 @@ static int __init print_ipi_mode(void)
142 172
143late_initcall(print_ipi_mode); 173late_initcall(print_ipi_mode);
144 174
145/**
146 * machine_specific_memory_setup - Hook for machine specific memory setup.
147 *
148 * Description:
149 * This is included late in kernel/setup.c so that it can make
150 * use of all of the static functions.
151 **/
152
153char * __init machine_specific_memory_setup(void)
154{
155 char *who;
156
157
158 who = "BIOS-e820";
159
160 /*
161 * Try to copy the BIOS-supplied E820-map.
162 *
163 * Otherwise fake a memory map; one section from 0k->640k,
164 * the next section from 1mb->appropriate_mem_k
165 */
166 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
167 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
168 < 0) {
169 unsigned long mem_size;
170
171 /* compare results from other methods and take the greater */
172 if (boot_params.alt_mem_k
173 < boot_params.screen_info.ext_mem_k) {
174 mem_size = boot_params.screen_info.ext_mem_k;
175 who = "BIOS-88";
176 } else {
177 mem_size = boot_params.alt_mem_k;
178 who = "BIOS-e801";
179 }
180
181 e820.nr_map = 0;
182 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
183 add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
184 }
185 return who;
186}
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile
index 69dd4da218dc..3ef8b43b62fc 100644
--- a/arch/x86/mach-es7000/Makefile
+++ b/arch/x86/mach-es7000/Makefile
@@ -3,4 +3,3 @@
3# 3#
4 4
5obj-$(CONFIG_X86_ES7000) := es7000plat.o 5obj-$(CONFIG_X86_ES7000) := es7000plat.o
6obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c
index f5d6f7d8b86e..4354ce804889 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/mach-es7000/es7000plat.c
@@ -52,6 +52,8 @@ static struct mip_reg *host_reg;
52static int mip_port; 52static int mip_port;
53static unsigned long mip_addr, host_addr; 53static unsigned long mip_addr, host_addr;
54 54
55int es7000_plat;
56
55/* 57/*
56 * GSI override for ES7000 platforms. 58 * GSI override for ES7000 platforms.
57 */ 59 */
@@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
175} 177}
176#endif 178#endif
177 179
178/*
179 * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic
180 * arch already has got following function definitions (asm-generic/es7000.c)
181 * hence no need to define these for that case.
182 */
183#ifndef CONFIG_X86_GENERICARCH
184void es7000_sw_apic(void);
185void __init enable_apic_mode(void)
186{
187 es7000_sw_apic();
188 return;
189}
190
191__init int mps_oem_check(struct mp_config_table *mpc, char *oem,
192 char *productid)
193{
194 if (mpc->mpc_oemptr) {
195 struct mp_config_oemtable *oem_table =
196 (struct mp_config_oemtable *)mpc->mpc_oemptr;
197 if (!strncmp(oem, "UNISYS", 6))
198 return parse_unisys_oem((char *)oem_table);
199 }
200 return 0;
201}
202#ifdef CONFIG_ACPI
203/* Hook from generic ACPI tables.c */
204int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
205{
206 unsigned long oem_addr;
207 if (!find_unisys_acpi_oem_table(&oem_addr)) {
208 if (es7000_check_dsdt())
209 return parse_unisys_oem((char *)oem_addr);
210 else {
211 setup_unisys();
212 return 1;
213 }
214 }
215 return 0;
216}
217#else
218int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
219{
220 return 0;
221}
222#endif
223#endif /* COFIG_X86_GENERICARCH */
224
225static void 180static void
226es7000_spin(int n) 181es7000_spin(int n)
227{ 182{
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 19d6d407737b..0dbd7803a1d5 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -2,7 +2,11 @@
2# Makefile for the generic architecture 2# Makefile for the generic architecture
3# 3#
4 4
5EXTRA_CFLAGS := -Iarch/x86/kernel 5EXTRA_CFLAGS := -Iarch/x86/kernel
6 6
7obj-y := probe.o summit.o bigsmp.o es7000.o default.o 7obj-y := probe.o default.o
8obj-y += ../../x86/mach-es7000/ 8obj-$(CONFIG_X86_NUMAQ) += numaq.o
9obj-$(CONFIG_X86_SUMMIT) += summit.o
10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
11obj-$(CONFIG_X86_ES7000) += es7000.o
12obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 95fc463056d0..59d771714559 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */
23 23
24static int hp_ht_bigsmp(const struct dmi_system_id *d) 24static int hp_ht_bigsmp(const struct dmi_system_id *d)
25{ 25{
26#ifdef CONFIG_X86_GENERICARCH
27 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); 26 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
28 dmi_bigsmp = 1; 27 dmi_bigsmp = 1;
29#endif
30 return 0; 28 return 0;
31} 29}
32 30
@@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
48static int probe_bigsmp(void) 46static int probe_bigsmp(void)
49{ 47{
50 if (def_to_bigsmp) 48 if (def_to_bigsmp)
51 dmi_bigsmp = 1; 49 dmi_bigsmp = 1;
52 else 50 else
53 dmi_check_system(bigsmp_dmi_table); 51 dmi_check_system(bigsmp_dmi_table);
54 return dmi_bigsmp; 52 return dmi_bigsmp;
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
new file mode 100644
index 000000000000..8091e68764c4
--- /dev/null
+++ b/arch/x86/mach-generic/numaq.c
@@ -0,0 +1,41 @@
1/*
2 * APIC driver for the IBM NUMAQ chipset.
3 */
4#define APIC_DEFINITION 1
5#include <linux/threads.h>
6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h>
9#include <asm/genapic.h>
10#include <asm/fixmap.h>
11#include <asm/apicdef.h>
12#include <linux/kernel.h>
13#include <linux/string.h>
14#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h>
16#include <asm/mach-numaq/mach_apicdef.h>
17#include <asm/mach-numaq/mach_ipi.h>
18#include <asm/mach-numaq/mach_mpparse.h>
19#include <asm/mach-numaq/mach_wakecpu.h>
20#include <asm/numaq.h>
21
22static int mps_oem_check(struct mp_config_table *mpc, char *oem,
23 char *productid)
24{
25 numaq_mps_oem_check(mpc, oem, productid);
26 return found_numaq;
27}
28
29static int probe_numaq(void)
30{
31 /* already know from get_memcfg_numaq() */
32 return found_numaq;
33}
34
35/* Hook from generic ACPI tables.c */
36static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
37{
38 return 0;
39}
40
41struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index c5ae751b994a..5a7e4619e1c4 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -16,6 +16,7 @@
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18 18
19extern struct genapic apic_numaq;
19extern struct genapic apic_summit; 20extern struct genapic apic_summit;
20extern struct genapic apic_bigsmp; 21extern struct genapic apic_bigsmp;
21extern struct genapic apic_es7000; 22extern struct genapic apic_es7000;
@@ -24,9 +25,18 @@ extern struct genapic apic_default;
24struct genapic *genapic = &apic_default; 25struct genapic *genapic = &apic_default;
25 26
26static struct genapic *apic_probe[] __initdata = { 27static struct genapic *apic_probe[] __initdata = {
28#ifdef CONFIG_X86_NUMAQ
29 &apic_numaq,
30#endif
31#ifdef CONFIG_X86_SUMMIT
27 &apic_summit, 32 &apic_summit,
33#endif
34#ifdef CONFIG_X86_BIGSMP
28 &apic_bigsmp, 35 &apic_bigsmp,
36#endif
37#ifdef CONFIG_X86_ES7000
29 &apic_es7000, 38 &apic_es7000,
39#endif
30 &apic_default, /* must be last */ 40 &apic_default, /* must be last */
31 NULL, 41 NULL,
32}; 42};
@@ -54,6 +64,7 @@ early_param("apic", parse_apic);
54 64
55void __init generic_bigsmp_probe(void) 65void __init generic_bigsmp_probe(void)
56{ 66{
67#ifdef CONFIG_X86_BIGSMP
57 /* 68 /*
58 * This routine is used to switch to bigsmp mode when 69 * This routine is used to switch to bigsmp mode when
59 * - There is no apic= option specified by the user 70 * - There is no apic= option specified by the user
@@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void)
67 printk(KERN_INFO "Overriding APIC driver with %s\n", 78 printk(KERN_INFO "Overriding APIC driver with %s\n",
68 genapic->name); 79 genapic->name);
69 } 80 }
81#endif
70} 82}
71 83
72void __init generic_apic_probe(void) 84void __init generic_apic_probe(void)
@@ -88,7 +100,8 @@ void __init generic_apic_probe(void)
88 100
89/* These functions can switch the APIC even after the initial ->probe() */ 101/* These functions can switch the APIC even after the initial ->probe() */
90 102
91int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) 103int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
104 char *productid)
92{ 105{
93 int i; 106 int i;
94 for (i = 0; apic_probe[i]; ++i) { 107 for (i = 0; apic_probe[i]; ++i) {
diff --git a/arch/x86/mach-visws/Makefile b/arch/x86/mach-visws/Makefile
deleted file mode 100644
index 835fd96ad768..000000000000
--- a/arch/x86/mach-visws/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-y := setup.o traps.o reboot.o
6
7obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o
8obj-$(CONFIG_X86_LOCAL_APIC) += mpparse.o
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c
deleted file mode 100644
index 57484e91ab90..000000000000
--- a/arch/x86/mach-visws/mpparse.c
+++ /dev/null
@@ -1,88 +0,0 @@
1
2#include <linux/init.h>
3#include <linux/smp.h>
4
5#include <asm/smp.h>
6#include <asm/io.h>
7
8#include "cobalt.h"
9#include "mach_apic.h"
10
11/* Have we found an MP table */
12int smp_found_config;
13
14int pic_mode;
15
16extern unsigned int __cpuinitdata maxcpus;
17
18/*
19 * The Visual Workstation is Intel MP compliant in the hardware
20 * sense, but it doesn't have a BIOS(-configuration table).
21 * No problem for Linux.
22 */
23
24static void __init MP_processor_info (struct mpc_config_processor *m)
25{
26 int ver, logical_apicid;
27 physid_mask_t apic_cpus;
28
29 if (!(m->mpc_cpuflag & CPU_ENABLED))
30 return;
31
32 logical_apicid = m->mpc_apicid;
33 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
34 m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
35 m->mpc_apicid,
36 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
37 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
38 m->mpc_apicver);
39
40 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
41 boot_cpu_physical_apicid = m->mpc_apicid;
42
43 ver = m->mpc_apicver;
44 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
45 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
46 m->mpc_apicid, MAX_APICS);
47 return;
48 }
49
50 apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
51 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
52 /*
53 * Validate version
54 */
55 if (ver == 0x0) {
56 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
57 "fixing up to 0x10. (tell your hw vendor)\n",
58 m->mpc_apicid);
59 ver = 0x10;
60 }
61 apic_version[m->mpc_apicid] = ver;
62}
63
64void __init find_smp_config(void)
65{
66 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
67 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
68
69 if (ncpus > CO_CPU_MAX) {
70 printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
71 ncpus, mp);
72
73 ncpus = CO_CPU_MAX;
74 }
75
76 if (ncpus > maxcpus)
77 ncpus = maxcpus;
78
79 smp_found_config = 1;
80 while (ncpus--)
81 MP_processor_info(mp++);
82
83 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
84}
85
86void __init get_smp_config (void)
87{
88}
diff --git a/arch/x86/mach-visws/reboot.c b/arch/x86/mach-visws/reboot.c
deleted file mode 100644
index 99332abfad42..000000000000
--- a/arch/x86/mach-visws/reboot.c
+++ /dev/null
@@ -1,55 +0,0 @@
1#include <linux/module.h>
2#include <linux/smp.h>
3#include <linux/delay.h>
4
5#include <asm/io.h>
6#include "piix4.h"
7
8void (*pm_power_off)(void);
9EXPORT_SYMBOL(pm_power_off);
10
11void machine_shutdown(void)
12{
13#ifdef CONFIG_SMP
14 smp_send_stop();
15#endif
16}
17
18void machine_emergency_restart(void)
19{
20 /*
21 * Visual Workstations restart after this
22 * register is poked on the PIIX4
23 */
24 outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
25}
26
27void machine_restart(char * __unused)
28{
29 machine_shutdown();
30 machine_emergency_restart();
31}
32
33void machine_power_off(void)
34{
35 unsigned short pm_status;
36 extern unsigned int pci_bus0;
37
38 while ((pm_status = inw(PMSTS_PORT)) & 0x100)
39 outw(pm_status, PMSTS_PORT);
40
41 outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
42
43 mdelay(10);
44
45#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
46 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
47
48 outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8);
49 outl(PIIX_SPECIAL_STOP, 0xCFC);
50}
51
52void machine_halt(void)
53{
54}
55
diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c
deleted file mode 100644
index de4c9dbd086f..000000000000
--- a/arch/x86/mach-visws/setup.c
+++ /dev/null
@@ -1,183 +0,0 @@
1/*
2 * Unmaintained SGI Visual Workstation support.
3 * Split out from setup.c by davej@suse.de
4 */
5
6#include <linux/smp.h>
7#include <linux/init.h>
8#include <linux/interrupt.h>
9#include <linux/module.h>
10
11#include <asm/fixmap.h>
12#include <asm/arch_hooks.h>
13#include <asm/io.h>
14#include <asm/e820.h>
15#include <asm/setup.h>
16#include "cobalt.h"
17#include "piix4.h"
18
19int no_broadcast;
20
21char visws_board_type = -1;
22char visws_board_rev = -1;
23
24void __init visws_get_board_type_and_rev(void)
25{
26 int raw;
27
28 visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
29 >> PIIX_GPI_BD_SHIFT;
30 /*
31 * Get Board rev.
32 * First, we have to initialize the 307 part to allow us access
33 * to the GPIO registers. Let's map them at 0x0fc0 which is right
34 * after the PIIX4 PM section.
35 */
36 outb_p(SIO_DEV_SEL, SIO_INDEX);
37 outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
38
39 outb_p(SIO_DEV_MSB, SIO_INDEX);
40 outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
41
42 outb_p(SIO_DEV_LSB, SIO_INDEX);
43 outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
44
45 outb_p(SIO_DEV_ENB, SIO_INDEX);
46 outb_p(1, SIO_DATA); /* Enable GPIO registers. */
47
48 /*
49 * Now, we have to map the power management section to write
50 * a bit which enables access to the GPIO registers.
51 * What lunatic came up with this shit?
52 */
53 outb_p(SIO_DEV_SEL, SIO_INDEX);
54 outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
55
56 outb_p(SIO_DEV_MSB, SIO_INDEX);
57 outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
58
59 outb_p(SIO_DEV_LSB, SIO_INDEX);
60 outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
61
62 outb_p(SIO_DEV_ENB, SIO_INDEX);
63 outb_p(1, SIO_DATA); /* Enable PM registers. */
64
65 /*
66 * Now, write the PM register which enables the GPIO registers.
67 */
68 outb_p(SIO_PM_FER2, SIO_PM_INDEX);
69 outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
70
71 /*
72 * Now, initialize the GPIO registers.
73 * We want them all to be inputs which is the
74 * power on default, so let's leave them alone.
75 * So, let's just read the board rev!
76 */
77 raw = inb_p(SIO_GP_DATA1);
78 raw &= 0x7f; /* 7 bits of valid board revision ID. */
79
80 if (visws_board_type == VISWS_320) {
81 if (raw < 0x6) {
82 visws_board_rev = 4;
83 } else if (raw < 0xc) {
84 visws_board_rev = 5;
85 } else {
86 visws_board_rev = 6;
87 }
88 } else if (visws_board_type == VISWS_540) {
89 visws_board_rev = 2;
90 } else {
91 visws_board_rev = raw;
92 }
93
94 printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
95 (visws_board_type == VISWS_320 ? "320" :
96 (visws_board_type == VISWS_540 ? "540" :
97 "unknown")), visws_board_rev);
98}
99
100void __init pre_intr_init_hook(void)
101{
102 init_VISWS_APIC_irqs();
103}
104
105void __init intr_init_hook(void)
106{
107#ifdef CONFIG_X86_LOCAL_APIC
108 apic_intr_init();
109#endif
110}
111
112void __init pre_setup_arch_hook()
113{
114 visws_get_board_type_and_rev();
115}
116
117static struct irqaction irq0 = {
118 .handler = timer_interrupt,
119 .flags = IRQF_DISABLED | IRQF_IRQPOLL,
120 .name = "timer",
121};
122
123void __init time_init_hook(void)
124{
125 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
126
127 /* Set the countdown value */
128 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
129
130 /* Start the timer */
131 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
132
133 /* Enable (unmask) the timer interrupt */
134 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
135
136 /* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
137 setup_irq(0, &irq0);
138}
139
140/* Hook for machine specific memory setup. */
141
142#define MB (1024 * 1024)
143
144unsigned long sgivwfb_mem_phys;
145unsigned long sgivwfb_mem_size;
146EXPORT_SYMBOL(sgivwfb_mem_phys);
147EXPORT_SYMBOL(sgivwfb_mem_size);
148
149long long mem_size __initdata = 0;
150
151char * __init machine_specific_memory_setup(void)
152{
153 long long gfx_mem_size = 8 * MB;
154
155 mem_size = boot_params.alt_mem_k;
156
157 if (!mem_size) {
158 printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
159 mem_size = 128 * MB;
160 }
161
162 /*
163 * this hardcodes the graphics memory to 8 MB
164 * it really should be sized dynamically (or at least
165 * set as a boot param)
166 */
167 if (!sgivwfb_mem_size) {
168 printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
169 sgivwfb_mem_size = 8 * MB;
170 }
171
172 /*
173 * Trim to nearest MB
174 */
175 sgivwfb_mem_size &= ~((1 << 20) - 1);
176 sgivwfb_mem_phys = mem_size - gfx_mem_size;
177
178 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
179 add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
180 add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
181
182 return "PROM";
183}
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c
deleted file mode 100644
index bfac6ba10f8a..000000000000
--- a/arch/x86/mach-visws/traps.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/* VISWS traps */
2
3#include <linux/sched.h>
4#include <linux/kernel.h>
5#include <linux/init.h>
6#include <linux/pci.h>
7#include <linux/pci_ids.h>
8
9#include <asm/io.h>
10#include <asm/arch_hooks.h>
11#include <asm/apic.h>
12#include "cobalt.h"
13#include "lithium.h"
14
15
16#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
17#define BCD (LI_INTB | LI_INTC | LI_INTD)
18#define ALLDEVS (A01234 | BCD)
19
20static __init void lithium_init(void)
21{
22 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
23 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
24
25 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
26 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
27 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
28 panic("This machine is not SGI Visual Workstation 320/540");
29 }
30
31 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
32 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
33 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
34 panic("This machine is not SGI Visual Workstation 320/540");
35 }
36
37 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
38 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
39}
40
41static __init void cobalt_init(void)
42{
43 /*
44 * On normal SMP PC this is used only with SMP, but we have to
45 * use it and set it up here to start the Cobalt clock
46 */
47 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
48 setup_local_APIC();
49 printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
50 (unsigned int)apic_read(APIC_LVR),
51 (unsigned int)apic_read(APIC_ID));
52
53 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
54 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
55 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
56 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
57
58 /* Enable Cobalt APIC being careful to NOT change the ID! */
59 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
60
61 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
62 co_apic_read(CO_APIC_ID));
63}
64
65void __init trap_init_hook(void)
66{
67 lithium_init();
68 cobalt_init();
69}
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c
deleted file mode 100644
index cef9cb1d15ac..000000000000
--- a/arch/x86/mach-visws/visws_apic.c
+++ /dev/null
@@ -1,297 +0,0 @@
1/*
2 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
3 *
4 * SGI Visual Workstation interrupt controller
5 *
6 * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
7 * which serves as the main interrupt controller in the system. Non-legacy
8 * hardware in the system uses this controller directly. Legacy devices
9 * are connected to the PIIX4 which in turn has its 8259(s) connected to
10 * a of the Cobalt APIC entry.
11 *
12 * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
13 *
14 * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
15 */
16
17#include <linux/kernel_stat.h>
18#include <linux/interrupt.h>
19#include <linux/init.h>
20
21#include <asm/io.h>
22#include <asm/apic.h>
23#include <asm/i8259.h>
24
25#include "cobalt.h"
26#include "irq_vectors.h"
27
28
29static DEFINE_SPINLOCK(cobalt_lock);
30
31/*
32 * Set the given Cobalt APIC Redirection Table entry to point
33 * to the given IDT vector/index.
34 */
35static inline void co_apic_set(int entry, int irq)
36{
37 co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
38 co_apic_write(CO_APIC_HI(entry), 0);
39}
40
41/*
42 * Cobalt (IO)-APIC functions to handle PCI devices.
43 */
44static inline int co_apic_ide0_hack(void)
45{
46 extern char visws_board_type;
47 extern char visws_board_rev;
48
49 if (visws_board_type == VISWS_320 && visws_board_rev == 5)
50 return 5;
51 return CO_APIC_IDE0;
52}
53
54static int is_co_apic(unsigned int irq)
55{
56 if (IS_CO_APIC(irq))
57 return CO_APIC(irq);
58
59 switch (irq) {
60 case 0: return CO_APIC_CPU;
61 case CO_IRQ_IDE0: return co_apic_ide0_hack();
62 case CO_IRQ_IDE1: return CO_APIC_IDE1;
63 default: return -1;
64 }
65}
66
67
68/*
69 * This is the SGI Cobalt (IO-)APIC:
70 */
71
72static void enable_cobalt_irq(unsigned int irq)
73{
74 co_apic_set(is_co_apic(irq), irq);
75}
76
77static void disable_cobalt_irq(unsigned int irq)
78{
79 int entry = is_co_apic(irq);
80
81 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
82 co_apic_read(CO_APIC_LO(entry));
83}
84
85/*
86 * "irq" really just serves to identify the device. Here is where we
87 * map this to the Cobalt APIC entry where it's physically wired.
88 * This is called via request_irq -> setup_irq -> irq_desc->startup()
89 */
90static unsigned int startup_cobalt_irq(unsigned int irq)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&cobalt_lock, flags);
95 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
96 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
97 enable_cobalt_irq(irq);
98 spin_unlock_irqrestore(&cobalt_lock, flags);
99 return 0;
100}
101
102static void ack_cobalt_irq(unsigned int irq)
103{
104 unsigned long flags;
105
106 spin_lock_irqsave(&cobalt_lock, flags);
107 disable_cobalt_irq(irq);
108 apic_write(APIC_EOI, APIC_EIO_ACK);
109 spin_unlock_irqrestore(&cobalt_lock, flags);
110}
111
112static void end_cobalt_irq(unsigned int irq)
113{
114 unsigned long flags;
115
116 spin_lock_irqsave(&cobalt_lock, flags);
117 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
118 enable_cobalt_irq(irq);
119 spin_unlock_irqrestore(&cobalt_lock, flags);
120}
121
122static struct irq_chip cobalt_irq_type = {
123 .typename = "Cobalt-APIC",
124 .startup = startup_cobalt_irq,
125 .shutdown = disable_cobalt_irq,
126 .enable = enable_cobalt_irq,
127 .disable = disable_cobalt_irq,
128 .ack = ack_cobalt_irq,
129 .end = end_cobalt_irq,
130};
131
132
133/*
134 * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
135 * -- not the manner expected by the code in i8259.c.
136 *
137 * there is a 'master' physical interrupt source that gets sent to
138 * the CPU. But in the chipset there are various 'virtual' interrupts
139 * waiting to be handled. We represent this to Linux through a 'master'
140 * interrupt controller type, and through a special virtual interrupt-
141 * controller. Device drivers only see the virtual interrupt sources.
142 */
143static unsigned int startup_piix4_master_irq(unsigned int irq)
144{
145 init_8259A(0);
146
147 return startup_cobalt_irq(irq);
148}
149
150static void end_piix4_master_irq(unsigned int irq)
151{
152 unsigned long flags;
153
154 spin_lock_irqsave(&cobalt_lock, flags);
155 enable_cobalt_irq(irq);
156 spin_unlock_irqrestore(&cobalt_lock, flags);
157}
158
159static struct irq_chip piix4_master_irq_type = {
160 .typename = "PIIX4-master",
161 .startup = startup_piix4_master_irq,
162 .ack = ack_cobalt_irq,
163 .end = end_piix4_master_irq,
164};
165
166
167static struct irq_chip piix4_virtual_irq_type = {
168 .typename = "PIIX4-virtual",
169 .shutdown = disable_8259A_irq,
170 .enable = enable_8259A_irq,
171 .disable = disable_8259A_irq,
172};
173
174
175/*
176 * PIIX4-8259 master/virtual functions to handle interrupt requests
177 * from legacy devices: floppy, parallel, serial, rtc.
178 *
179 * None of these get Cobalt APIC entries, neither do they have IDT
180 * entries. These interrupts are purely virtual and distributed from
181 * the 'master' interrupt source: CO_IRQ_8259.
182 *
183 * When the 8259 interrupts its handler figures out which of these
184 * devices is interrupting and dispatches to its handler.
185 *
186 * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
187 * enable_irq gets the right irq. This 'master' irq is never directly
188 * manipulated by any driver.
189 */
190static irqreturn_t piix4_master_intr(int irq, void *dev_id)
191{
192 int realirq;
193 irq_desc_t *desc;
194 unsigned long flags;
195
196 spin_lock_irqsave(&i8259A_lock, flags);
197
198 /* Find out what's interrupting in the PIIX4 master 8259 */
199 outb(0x0c, 0x20); /* OCW3 Poll command */
200 realirq = inb(0x20);
201
202 /*
203 * Bit 7 == 0 means invalid/spurious
204 */
205 if (unlikely(!(realirq & 0x80)))
206 goto out_unlock;
207
208 realirq &= 7;
209
210 if (unlikely(realirq == 2)) {
211 outb(0x0c, 0xa0);
212 realirq = inb(0xa0);
213
214 if (unlikely(!(realirq & 0x80)))
215 goto out_unlock;
216
217 realirq = (realirq & 7) + 8;
218 }
219
220 /* mask and ack interrupt */
221 cached_irq_mask |= 1 << realirq;
222 if (unlikely(realirq > 7)) {
223 inb(0xa1);
224 outb(cached_slave_mask, 0xa1);
225 outb(0x60 + (realirq & 7), 0xa0);
226 outb(0x60 + 2, 0x20);
227 } else {
228 inb(0x21);
229 outb(cached_master_mask, 0x21);
230 outb(0x60 + realirq, 0x20);
231 }
232
233 spin_unlock_irqrestore(&i8259A_lock, flags);
234
235 desc = irq_desc + realirq;
236
237 /*
238 * handle this 'virtual interrupt' as a Cobalt one now.
239 */
240 kstat_cpu(smp_processor_id()).irqs[realirq]++;
241
242 if (likely(desc->action != NULL))
243 handle_IRQ_event(realirq, desc->action);
244
245 if (!(desc->status & IRQ_DISABLED))
246 enable_8259A_irq(realirq);
247
248 return IRQ_HANDLED;
249
250out_unlock:
251 spin_unlock_irqrestore(&i8259A_lock, flags);
252 return IRQ_NONE;
253}
254
255static struct irqaction master_action = {
256 .handler = piix4_master_intr,
257 .name = "PIIX4-8259",
258};
259
260static struct irqaction cascade_action = {
261 .handler = no_action,
262 .name = "cascade",
263};
264
265
266void init_VISWS_APIC_irqs(void)
267{
268 int i;
269
270 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
271 irq_desc[i].status = IRQ_DISABLED;
272 irq_desc[i].action = 0;
273 irq_desc[i].depth = 1;
274
275 if (i == 0) {
276 irq_desc[i].chip = &cobalt_irq_type;
277 }
278 else if (i == CO_IRQ_IDE0) {
279 irq_desc[i].chip = &cobalt_irq_type;
280 }
281 else if (i == CO_IRQ_IDE1) {
282 irq_desc[i].chip = &cobalt_irq_type;
283 }
284 else if (i == CO_IRQ_8259) {
285 irq_desc[i].chip = &piix4_master_irq_type;
286 }
287 else if (i < CO_IRQ_APIC0) {
288 irq_desc[i].chip = &piix4_virtual_irq_type;
289 }
290 else if (IS_CO_APIC(i)) {
291 irq_desc[i].chip = &cobalt_irq_type;
292 }
293 }
294
295 setup_irq(CO_IRQ_8259, &master_action);
296 setup_irq(2, &cascade_action);
297}
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 5ae5466b9eb9..6bbdd633864c 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -62,6 +62,7 @@ void __init time_init_hook(void)
62char *__init machine_specific_memory_setup(void) 62char *__init machine_specific_memory_setup(void)
63{ 63{
64 char *who; 64 char *who;
65 int new_nr;
65 66
66 who = "NOT VOYAGER"; 67 who = "NOT VOYAGER";
67 68
@@ -73,7 +74,7 @@ char *__init machine_specific_memory_setup(void)
73 74
74 e820.nr_map = 0; 75 e820.nr_map = 0;
75 for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { 76 for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
76 add_memory_region(addr, length, E820_RAM); 77 e820_add_region(addr, length, E820_RAM);
77 } 78 }
78 return who; 79 return who;
79 } else if (voyager_level == 4) { 80 } else if (voyager_level == 4) {
@@ -91,43 +92,17 @@ char *__init machine_specific_memory_setup(void)
91 tom = (boot_params.screen_info.ext_mem_k) << 10; 92 tom = (boot_params.screen_info.ext_mem_k) << 10;
92 } 93 }
93 who = "Voyager-TOM"; 94 who = "Voyager-TOM";
94 add_memory_region(0, 0x9f000, E820_RAM); 95 e820_add_region(0, 0x9f000, E820_RAM);
95 /* map from 1M to top of memory */ 96 /* map from 1M to top of memory */
96 add_memory_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, 97 e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
97 E820_RAM); 98 E820_RAM);
98 /* FIXME: Should check the ASICs to see if I need to 99 /* FIXME: Should check the ASICs to see if I need to
99 * take out the 8M window. Just do it at the moment 100 * take out the 8M window. Just do it at the moment
100 * */ 101 * */
101 add_memory_region(8 * 1024 * 1024, 8 * 1024 * 1024, 102 e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
102 E820_RESERVED); 103 E820_RESERVED);
103 return who; 104 return who;
104 } 105 }
105 106
106 who = "BIOS-e820"; 107 return default_machine_specific_memory_setup();
107
108 /*
109 * Try to copy the BIOS-supplied E820-map.
110 *
111 * Otherwise fake a memory map; one section from 0k->640k,
112 * the next section from 1mb->appropriate_mem_k
113 */
114 sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
115 if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
116 < 0) {
117 unsigned long mem_size;
118
119 /* compare results from other methods and take the greater */
120 if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) {
121 mem_size = boot_params.screen_info.ext_mem_k;
122 who = "BIOS-88";
123 } else {
124 mem_size = boot_params.alt_mem_k;
125 who = "BIOS-e801";
126 }
127
128 e820.nr_map = 0;
129 add_memory_region(0, LOWMEMSIZE(), E820_RAM);
130 add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
131 }
132 return who;
133} 108}
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8acbf0cdf1a5..8dedd01e909f 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0;
59 * activity count. Finally exported by i386_ksyms.c */ 59 * activity count. Finally exported by i386_ksyms.c */
60static int voyager_extended_cpus = 1; 60static int voyager_extended_cpus = 1;
61 61
62/* Have we found an SMP box - used by time.c to do the profiling
63 interrupt for timeslicing; do not set to 1 until the per CPU timer
64 interrupt is active */
65int smp_found_config = 0;
66
67/* Used for the invalidate map that's also checked in the spinlock */ 62/* Used for the invalidate map that's also checked in the spinlock */
68static volatile unsigned long smp_invalidate_needed; 63static volatile unsigned long smp_invalidate_needed;
69 64
@@ -1137,15 +1132,6 @@ void flush_tlb_all(void)
1137 on_each_cpu(do_flush_tlb_all, 0, 1, 1); 1132 on_each_cpu(do_flush_tlb_all, 0, 1, 1);
1138} 1133}
1139 1134
1140/* used to set up the trampoline for other CPUs when the memory manager
1141 * is sorted out */
1142void __init smp_alloc_memory(void)
1143{
1144 trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
1145 if (__pa(trampoline_base) >= 0x93000)
1146 BUG();
1147}
1148
1149/* send a reschedule CPI to one CPU by physical CPU number*/ 1135/* send a reschedule CPI to one CPU by physical CPU number*/
1150static void voyager_smp_send_reschedule(int cpu) 1136static void voyager_smp_send_reschedule(int cpu)
1151{ 1137{
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index 04869e64b18e..00548354912f 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -16,8 +16,8 @@
16#include "reg_constant.h" 16#include "reg_constant.h"
17#include "control_w.h" 17#include "control_w.h"
18 18
19#define MAKE_REG(s,e,l,h) { l, h, \ 19#define MAKE_REG(s, e, l, h) { l, h, \
20 ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } 20 ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
21 21
22FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); 22FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
23#if 0 23#if 0
@@ -40,7 +40,7 @@ FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
40FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); 40FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
41 41
42/* Only the sign and significand (and tag) are used in internal NaNs */ 42/* Only the sign and significand (and tag) are used in internal NaNs */
43/* The 80486 never generates one of these 43/* The 80486 never generates one of these
44FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); 44FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
45 */ 45 */
46/* This is the real indefinite QNaN */ 46/* This is the real indefinite QNaN */
@@ -49,7 +49,7 @@ FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
49/* Only the sign (and tag) is used in internal infinities */ 49/* Only the sign (and tag) is used in internal infinities */
50FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); 50FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
51 51
52static void fld_const(FPU_REG const *c, int adj, u_char tag) 52static void fld_const(FPU_REG const * c, int adj, u_char tag)
53{ 53{
54 FPU_REG *st_new_ptr; 54 FPU_REG *st_new_ptr;
55 55
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index b7b3e4c7cfc9..c107641cd39b 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,5 +13,6 @@ obj-$(CONFIG_NUMA) += discontig_32.o
13else 13else
14obj-$(CONFIG_NUMA) += numa_64.o 14obj-$(CONFIG_NUMA) += numa_64.o
15obj-$(CONFIG_K8_NUMA) += k8topology_64.o 15obj-$(CONFIG_K8_NUMA) += k8topology_64.o
16obj-$(CONFIG_ACPI_NUMA) += srat_64.o
17endif 16endif
17obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
18
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 914ccf983687..5dfef9fa061a 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -38,6 +38,7 @@
38#include <asm/setup.h> 38#include <asm/setup.h>
39#include <asm/mmzone.h> 39#include <asm/mmzone.h>
40#include <asm/bios_ebda.h> 40#include <asm/bios_ebda.h>
41#include <asm/proto.h>
41 42
42struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 43struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
43EXPORT_SYMBOL(node_data); 44EXPORT_SYMBOL(node_data);
@@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
59/* 60/*
60 * 4) physnode_map - the mapping between a pfn and owning node 61 * 4) physnode_map - the mapping between a pfn and owning node
61 * physnode_map keeps track of the physical memory layout of a generic 62 * physnode_map keeps track of the physical memory layout of a generic
62 * numa node on a 256Mb break (each element of the array will 63 * numa node on a 64Mb break (each element of the array will
63 * represent 256Mb of memory and will be marked by the node id. so, 64 * represent 64Mb of memory and will be marked by the node id. so,
64 * if the first gig is on node 0, and the second gig is on node 1 65 * if the first gig is on node 0, and the second gig is on node 1
65 * physnode_map will contain: 66 * physnode_map will contain:
66 * 67 *
67 * physnode_map[0-3] = 0; 68 * physnode_map[0-15] = 0;
68 * physnode_map[4-7] = 1; 69 * physnode_map[16-31] = 1;
69 * physnode_map[8- ] = -1; 70 * physnode_map[32- ] = -1;
70 */ 71 */
71s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; 72s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
72EXPORT_SYMBOL(physnode_map); 73EXPORT_SYMBOL(physnode_map);
@@ -75,15 +76,15 @@ void memory_present(int nid, unsigned long start, unsigned long end)
75{ 76{
76 unsigned long pfn; 77 unsigned long pfn;
77 78
78 printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n", 79 printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n",
79 nid, start, end); 80 nid, start, end);
80 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); 81 printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
81 printk(KERN_DEBUG " "); 82 printk(KERN_DEBUG " ");
82 for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { 83 for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
83 physnode_map[pfn / PAGES_PER_ELEMENT] = nid; 84 physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
84 printk("%ld ", pfn); 85 printk(KERN_CONT "%lx ", pfn);
85 } 86 }
86 printk("\n"); 87 printk(KERN_CONT "\n");
87} 88}
88 89
89unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, 90unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
@@ -99,7 +100,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
99#endif 100#endif
100 101
101extern unsigned long find_max_low_pfn(void); 102extern unsigned long find_max_low_pfn(void);
102extern void add_one_highpage_init(struct page *, int, int);
103extern unsigned long highend_pfn, highstart_pfn; 103extern unsigned long highend_pfn, highstart_pfn;
104 104
105#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) 105#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@@ -117,13 +117,13 @@ static unsigned long kva_pages;
117 */ 117 */
118int __init get_memcfg_numa_flat(void) 118int __init get_memcfg_numa_flat(void)
119{ 119{
120 printk("NUMA - single node, flat memory mode\n"); 120 printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
121 121
122 /* Run the memory configuration and find the top of memory. */
123 propagate_e820_map();
124 node_start_pfn[0] = 0; 122 node_start_pfn[0] = 0;
125 node_end_pfn[0] = max_pfn; 123 node_end_pfn[0] = max_pfn;
124 e820_register_active_regions(0, 0, max_pfn);
126 memory_present(0, 0, max_pfn); 125 memory_present(0, 0, max_pfn);
126 node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
127 127
128 /* Indicate there is one node available. */ 128 /* Indicate there is one node available. */
129 nodes_clear(node_online_map); 129 nodes_clear(node_online_map);
@@ -156,24 +156,32 @@ static void __init propagate_e820_map_node(int nid)
156 */ 156 */
157static void __init allocate_pgdat(int nid) 157static void __init allocate_pgdat(int nid)
158{ 158{
159 if (nid && node_has_online_mem(nid)) 159 char buf[16];
160
161 if (node_has_online_mem(nid) && node_remap_start_vaddr[nid])
160 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; 162 NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
161 else { 163 else {
162 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); 164 unsigned long pgdat_phys;
163 min_low_pfn += PFN_UP(sizeof(pg_data_t)); 165 pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
166 max_pfn_mapped<<PAGE_SHIFT,
167 sizeof(pg_data_t),
168 PAGE_SIZE);
169 NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
170 memset(buf, 0, sizeof(buf));
171 sprintf(buf, "NODE_DATA %d", nid);
172 reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
164 } 173 }
174 printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
175 nid, (unsigned long)NODE_DATA(nid));
165} 176}
166 177
167#ifdef CONFIG_DISCONTIGMEM
168/* 178/*
169 * In the discontig memory model, a portion of the kernel virtual area (KVA) 179 * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel
170 * is reserved and portions of nodes are mapped using it. This is to allow 180 * virtual address space (KVA) is reserved and portions of nodes are mapped
171 * node-local memory to be allocated for structures that would normally require 181 * using it. This is to allow node-local memory to be allocated for
172 * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers 182 * structures that would normally require ZONE_NORMAL. The memory is
173 * should be prepared to allocate from the bootmem allocator instead. This KVA 183 * allocated with alloc_remap() and callers should be prepared to allocate
174 * mechanism is incompatible with SPARSEMEM as it makes assumptions about the 184 * from the bootmem allocator instead.
175 * layout of memory that are broken if alloc_remap() succeeds for some of the
176 * map and fails for others
177 */ 185 */
178static unsigned long node_remap_start_pfn[MAX_NUMNODES]; 186static unsigned long node_remap_start_pfn[MAX_NUMNODES];
179static void *node_remap_end_vaddr[MAX_NUMNODES]; 187static void *node_remap_end_vaddr[MAX_NUMNODES];
@@ -195,15 +203,19 @@ void *alloc_remap(int nid, unsigned long size)
195 return allocation; 203 return allocation;
196} 204}
197 205
198void __init remap_numa_kva(void) 206static void __init remap_numa_kva(void)
199{ 207{
200 void *vaddr; 208 void *vaddr;
201 unsigned long pfn; 209 unsigned long pfn;
202 int node; 210 int node;
203 211
204 for_each_online_node(node) { 212 for_each_online_node(node) {
213 printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
205 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { 214 for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
206 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); 215 vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
216 printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
217 (unsigned long)vaddr,
218 node_remap_start_pfn[node] + pfn);
207 set_pmd_pfn((ulong) vaddr, 219 set_pmd_pfn((ulong) vaddr,
208 node_remap_start_pfn[node] + pfn, 220 node_remap_start_pfn[node] + pfn,
209 PAGE_KERNEL_LARGE); 221 PAGE_KERNEL_LARGE);
@@ -215,17 +227,21 @@ static unsigned long calculate_numa_remap_pages(void)
215{ 227{
216 int nid; 228 int nid;
217 unsigned long size, reserve_pages = 0; 229 unsigned long size, reserve_pages = 0;
218 unsigned long pfn;
219 230
220 for_each_online_node(nid) { 231 for_each_online_node(nid) {
221 unsigned old_end_pfn = node_end_pfn[nid]; 232 u64 node_kva_target;
233 u64 node_kva_final;
222 234
223 /* 235 /*
224 * The acpi/srat node info can show hot-add memroy zones 236 * The acpi/srat node info can show hot-add memroy zones
225 * where memory could be added but not currently present. 237 * where memory could be added but not currently present.
226 */ 238 */
239 printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
240 nid, node_start_pfn[nid], node_end_pfn[nid]);
227 if (node_start_pfn[nid] > max_pfn) 241 if (node_start_pfn[nid] > max_pfn)
228 continue; 242 continue;
243 if (!node_end_pfn[nid])
244 continue;
229 if (node_end_pfn[nid] > max_pfn) 245 if (node_end_pfn[nid] > max_pfn)
230 node_end_pfn[nid] = max_pfn; 246 node_end_pfn[nid] = max_pfn;
231 247
@@ -237,41 +253,48 @@ static unsigned long calculate_numa_remap_pages(void)
237 /* now the roundup is correct, convert to PAGE_SIZE pages */ 253 /* now the roundup is correct, convert to PAGE_SIZE pages */
238 size = size * PTRS_PER_PTE; 254 size = size * PTRS_PER_PTE;
239 255
240 /* 256 node_kva_target = round_down(node_end_pfn[nid] - size,
241 * Validate the region we are allocating only contains valid 257 PTRS_PER_PTE);
242 * pages. 258 node_kva_target <<= PAGE_SHIFT;
243 */ 259 do {
244 for (pfn = node_end_pfn[nid] - size; 260 node_kva_final = find_e820_area(node_kva_target,
245 pfn < node_end_pfn[nid]; pfn++) 261 ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
246 if (!page_is_ram(pfn)) 262 ((u64)size)<<PAGE_SHIFT,
247 break; 263 LARGE_PAGE_BYTES);
248 264 node_kva_target -= LARGE_PAGE_BYTES;
249 if (pfn != node_end_pfn[nid]) 265 } while (node_kva_final == -1ULL &&
250 size = 0; 266 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
267
268 if (node_kva_final == -1ULL)
269 panic("Can not get kva ram\n");
251 270
252 printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
253 size, nid);
254 node_remap_size[nid] = size; 271 node_remap_size[nid] = size;
255 node_remap_offset[nid] = reserve_pages; 272 node_remap_offset[nid] = reserve_pages;
256 reserve_pages += size; 273 reserve_pages += size;
257 printk("Shrinking node %d from %ld pages to %ld pages\n", 274 printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
258 nid, node_end_pfn[nid], node_end_pfn[nid] - size); 275 " node %d at %llx\n",
259 276 size, nid, node_kva_final>>PAGE_SHIFT);
260 if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { 277
261 /* 278 /*
262 * Align node_end_pfn[] and node_remap_start_pfn[] to 279 * prevent kva address below max_low_pfn want it on system
263 * pmd boundary. remap_numa_kva will barf otherwise. 280 * with less memory later.
264 */ 281 * layout will be: KVA address , KVA RAM
265 printk("Shrinking node %d further by %ld pages for proper alignment\n", 282 *
266 nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); 283 * we are supposed to only record the one less then max_low_pfn
267 size += node_end_pfn[nid] & (PTRS_PER_PTE-1); 284 * but we could have some hole in high memory, and it will only
268 } 285 * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
286 * to use it as free.
287 * So reserve_early here, hope we don't run out of that array
288 */
289 reserve_early(node_kva_final,
290 node_kva_final+(((u64)size)<<PAGE_SHIFT),
291 "KVA RAM");
269 292
270 node_end_pfn[nid] -= size; 293 node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
271 node_remap_start_pfn[nid] = node_end_pfn[nid]; 294 remove_active_range(nid, node_remap_start_pfn[nid],
272 shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); 295 node_remap_start_pfn[nid] + size);
273 } 296 }
274 printk("Reserving total of %ld pages for numa KVA remap\n", 297 printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
275 reserve_pages); 298 reserve_pages);
276 return reserve_pages; 299 return reserve_pages;
277} 300}
@@ -285,37 +308,16 @@ static void init_remap_allocator(int nid)
285 node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + 308 node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
286 ALIGN(sizeof(pg_data_t), PAGE_SIZE); 309 ALIGN(sizeof(pg_data_t), PAGE_SIZE);
287 310
288 printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, 311 printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
289 (ulong) node_remap_start_vaddr[nid], 312 (ulong) node_remap_start_vaddr[nid],
290 (ulong) pfn_to_kaddr(highstart_pfn 313 (ulong) node_remap_end_vaddr[nid]);
291 + node_remap_offset[nid] + node_remap_size[nid]));
292}
293#else
294void *alloc_remap(int nid, unsigned long size)
295{
296 return NULL;
297}
298
299static unsigned long calculate_numa_remap_pages(void)
300{
301 return 0;
302}
303
304static void init_remap_allocator(int nid)
305{
306}
307
308void __init remap_numa_kva(void)
309{
310} 314}
311#endif /* CONFIG_DISCONTIGMEM */
312 315
313extern void setup_bootmem_allocator(void); 316void __init initmem_init(unsigned long start_pfn,
314unsigned long __init setup_memory(void) 317 unsigned long end_pfn)
315{ 318{
316 int nid; 319 int nid;
317 unsigned long system_start_pfn, system_max_low_pfn; 320 long kva_target_pfn;
318 unsigned long wasted_pages;
319 321
320 /* 322 /*
321 * When mapping a NUMA machine we allocate the node_mem_map arrays 323 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -324,109 +326,77 @@ unsigned long __init setup_memory(void)
324 * this space and use it to adjust the boundary between ZONE_NORMAL 326 * this space and use it to adjust the boundary between ZONE_NORMAL
325 * and ZONE_HIGHMEM. 327 * and ZONE_HIGHMEM.
326 */ 328 */
327 get_memcfg_numa();
328 329
329 kva_pages = calculate_numa_remap_pages(); 330 get_memcfg_numa();
330 331
331 /* partially used pages are not usable - thus round upwards */ 332 kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
332 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
333 333
334 kva_start_pfn = find_max_low_pfn() - kva_pages; 334 kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
335 do {
336 kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
337 max_low_pfn<<PAGE_SHIFT,
338 kva_pages<<PAGE_SHIFT,
339 PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
340 kva_target_pfn -= PTRS_PER_PTE;
341 } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn);
335 342
336#ifdef CONFIG_BLK_DEV_INITRD 343 if (kva_start_pfn == -1UL)
337 /* Numa kva area is below the initrd */ 344 panic("Can not get kva space\n");
338 if (initrd_start)
339 kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
340 - kva_pages;
341#endif
342 345
343 /* 346 printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
344 * We waste pages past at the end of the KVA for no good reason other
345 * than how it is located. This is bad.
346 */
347 wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
348 kva_start_pfn -= wasted_pages;
349 kva_pages += wasted_pages;
350
351 system_max_low_pfn = max_low_pfn = find_max_low_pfn();
352 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
353 kva_start_pfn, max_low_pfn); 347 kva_start_pfn, max_low_pfn);
354 printk("max_pfn = %ld\n", max_pfn); 348 printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
349
350 /* avoid clash with initrd */
351 reserve_early(kva_start_pfn<<PAGE_SHIFT,
352 (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
353 "KVA PG");
355#ifdef CONFIG_HIGHMEM 354#ifdef CONFIG_HIGHMEM
356 highstart_pfn = highend_pfn = max_pfn; 355 highstart_pfn = highend_pfn = max_pfn;
357 if (max_pfn > system_max_low_pfn) 356 if (max_pfn > max_low_pfn)
358 highstart_pfn = system_max_low_pfn; 357 highstart_pfn = max_low_pfn;
359 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 358 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
360 pages_to_mb(highend_pfn - highstart_pfn)); 359 pages_to_mb(highend_pfn - highstart_pfn));
361 num_physpages = highend_pfn; 360 num_physpages = highend_pfn;
362 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; 361 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
363#else 362#else
364 num_physpages = system_max_low_pfn; 363 num_physpages = max_low_pfn;
365 high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; 364 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
366#endif 365#endif
367 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 366 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
368 pages_to_mb(system_max_low_pfn)); 367 pages_to_mb(max_low_pfn));
369 printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", 368 printk(KERN_DEBUG "max_low_pfn = %lx, highstart_pfn = %lx\n",
370 min_low_pfn, max_low_pfn, highstart_pfn); 369 max_low_pfn, highstart_pfn);
371 370
372 printk("Low memory ends at vaddr %08lx\n", 371 printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
373 (ulong) pfn_to_kaddr(max_low_pfn)); 372 (ulong) pfn_to_kaddr(max_low_pfn));
374 for_each_online_node(nid) { 373 for_each_online_node(nid) {
375 init_remap_allocator(nid); 374 init_remap_allocator(nid);
376 375
377 allocate_pgdat(nid); 376 allocate_pgdat(nid);
378 } 377 }
379 printk("High memory starts at vaddr %08lx\n", 378 remap_numa_kva();
379
380 printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
380 (ulong) pfn_to_kaddr(highstart_pfn)); 381 (ulong) pfn_to_kaddr(highstart_pfn));
381 for_each_online_node(nid) 382 for_each_online_node(nid)
382 propagate_e820_map_node(nid); 383 propagate_e820_map_node(nid);
383 384
384 memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); 385 for_each_online_node(nid)
386 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
387
385 NODE_DATA(0)->bdata = &node0_bdata; 388 NODE_DATA(0)->bdata = &node0_bdata;
386 setup_bootmem_allocator(); 389 setup_bootmem_allocator();
387 return max_low_pfn;
388}
389
390void __init numa_kva_reserve(void)
391{
392 if (kva_pages)
393 reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
394 BOOTMEM_DEFAULT);
395} 390}
396 391
397void __init zone_sizes_init(void) 392void __init set_highmem_pages_init(void)
398{
399 int nid;
400 unsigned long max_zone_pfns[MAX_NR_ZONES];
401 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
402 max_zone_pfns[ZONE_DMA] =
403 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
404 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
405#ifdef CONFIG_HIGHMEM
406 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
407#endif
408
409 /* If SRAT has not registered memory, register it now */
410 if (find_max_pfn_with_active_regions() == 0) {
411 for_each_online_node(nid) {
412 if (node_has_online_mem(nid))
413 add_active_range(nid, node_start_pfn[nid],
414 node_end_pfn[nid]);
415 }
416 }
417
418 free_area_init_nodes(max_zone_pfns);
419 return;
420}
421
422void __init set_highmem_pages_init(int bad_ppro)
423{ 393{
424#ifdef CONFIG_HIGHMEM 394#ifdef CONFIG_HIGHMEM
425 struct zone *zone; 395 struct zone *zone;
426 struct page *page; 396 int nid;
427 397
428 for_each_zone(zone) { 398 for_each_zone(zone) {
429 unsigned long node_pfn, zone_start_pfn, zone_end_pfn; 399 unsigned long zone_start_pfn, zone_end_pfn;
430 400
431 if (!is_highmem(zone)) 401 if (!is_highmem(zone))
432 continue; 402 continue;
@@ -434,16 +404,12 @@ void __init set_highmem_pages_init(int bad_ppro)
434 zone_start_pfn = zone->zone_start_pfn; 404 zone_start_pfn = zone->zone_start_pfn;
435 zone_end_pfn = zone_start_pfn + zone->spanned_pages; 405 zone_end_pfn = zone_start_pfn + zone->spanned_pages;
436 406
437 printk("Initializing %s for node %d (%08lx:%08lx)\n", 407 nid = zone_to_nid(zone);
438 zone->name, zone_to_nid(zone), 408 printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
439 zone_start_pfn, zone_end_pfn); 409 zone->name, nid, zone_start_pfn, zone_end_pfn);
440 410
441 for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { 411 add_highpages_with_active_regions(nid, zone_start_pfn,
442 if (!pfn_valid(node_pfn)) 412 zone_end_pfn);
443 continue;
444 page = pfn_to_page(node_pfn);
445 add_one_highpage_init(page, node_pfn, bad_ppro);
446 }
447 } 413 }
448 totalram_pages += totalhigh_pages; 414 totalram_pages += totalhigh_pages;
449#endif 415#endif
@@ -476,3 +442,4 @@ int memory_add_physaddr_to_nid(u64 addr)
476 442
477EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 443EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
478#endif 444#endif
445
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2c24bea92c66..0bb0caed8971 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -42,7 +42,7 @@ static struct addr_marker address_markers[] = {
42 { 0, "User Space" }, 42 { 0, "User Space" },
43#ifdef CONFIG_X86_64 43#ifdef CONFIG_X86_64
44 { 0x8000000000000000UL, "Kernel Space" }, 44 { 0x8000000000000000UL, "Kernel Space" },
45 { 0xffff810000000000UL, "Low Kernel Mapping" }, 45 { PAGE_OFFSET, "Low Kernel Mapping" },
46 { VMALLOC_START, "vmalloc() Area" }, 46 { VMALLOC_START, "vmalloc() Area" },
47 { VMEMMAP_START, "Vmemmap" }, 47 { VMEMMAP_START, "Vmemmap" },
48 { __START_KERNEL_map, "High Kernel Mapping" }, 48 { __START_KERNEL_map, "High Kernel Mapping" },
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8bcb6f40ccb6..d0f5fce77d95 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -55,11 +55,7 @@ static inline int notify_page_fault(struct pt_regs *regs)
55 int ret = 0; 55 int ret = 0;
56 56
57 /* kprobe_running() needs smp_processor_id() */ 57 /* kprobe_running() needs smp_processor_id() */
58#ifdef CONFIG_X86_32
59 if (!user_mode_vm(regs)) { 58 if (!user_mode_vm(regs)) {
60#else
61 if (!user_mode(regs)) {
62#endif
63 preempt_disable(); 59 preempt_disable();
64 if (kprobe_running() && kprobe_fault_handler(regs, 14)) 60 if (kprobe_running() && kprobe_fault_handler(regs, 14))
65 ret = 1; 61 ret = 1;
@@ -396,11 +392,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
396 printk(KERN_CONT "NULL pointer dereference"); 392 printk(KERN_CONT "NULL pointer dereference");
397 else 393 else
398 printk(KERN_CONT "paging request"); 394 printk(KERN_CONT "paging request");
399#ifdef CONFIG_X86_32 395 printk(KERN_CONT " at %p\n", (void *) address);
400 printk(KERN_CONT " at %08lx\n", address);
401#else
402 printk(KERN_CONT " at %016lx\n", address);
403#endif
404 printk(KERN_ALERT "IP:"); 396 printk(KERN_ALERT "IP:");
405 printk_address(regs->ip, 1); 397 printk_address(regs->ip, 1);
406 dump_pagetable(address); 398 dump_pagetable(address);
@@ -800,14 +792,10 @@ bad_area_nosemaphore:
800 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && 792 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
801 printk_ratelimit()) { 793 printk_ratelimit()) {
802 printk( 794 printk(
803#ifdef CONFIG_X86_32 795 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
804 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
805#else
806 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
807#endif
808 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 796 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
809 tsk->comm, task_pid_nr(tsk), address, regs->ip, 797 tsk->comm, task_pid_nr(tsk), address,
810 regs->sp, error_code); 798 (void *) regs->ip, (void *) regs->sp, error_code);
811 print_vma_addr(" in ", regs->ip); 799 print_vma_addr(" in ", regs->ip);
812 printk("\n"); 800 printk("\n");
813 } 801 }
@@ -915,14 +903,7 @@ LIST_HEAD(pgd_list);
915void vmalloc_sync_all(void) 903void vmalloc_sync_all(void)
916{ 904{
917#ifdef CONFIG_X86_32 905#ifdef CONFIG_X86_32
918 /* 906 unsigned long start = VMALLOC_START & PGDIR_MASK;
919 * Note that races in the updates of insync and start aren't
920 * problematic: insync can only get set bits added, and updates to
921 * start are only improving performance (without affecting correctness
922 * if undone).
923 */
924 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
925 static unsigned long start = TASK_SIZE;
926 unsigned long address; 907 unsigned long address;
927 908
928 if (SHARED_KERNEL_PMD) 909 if (SHARED_KERNEL_PMD)
@@ -930,56 +911,38 @@ void vmalloc_sync_all(void)
930 911
931 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); 912 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
932 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { 913 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
933 if (!test_bit(pgd_index(address), insync)) { 914 unsigned long flags;
934 unsigned long flags; 915 struct page *page;
935 struct page *page; 916
936 917 spin_lock_irqsave(&pgd_lock, flags);
937 spin_lock_irqsave(&pgd_lock, flags); 918 list_for_each_entry(page, &pgd_list, lru) {
938 list_for_each_entry(page, &pgd_list, lru) { 919 if (!vmalloc_sync_one(page_address(page),
939 if (!vmalloc_sync_one(page_address(page), 920 address))
940 address)) 921 break;
941 break;
942 }
943 spin_unlock_irqrestore(&pgd_lock, flags);
944 if (!page)
945 set_bit(pgd_index(address), insync);
946 } 922 }
947 if (address == start && test_bit(pgd_index(address), insync)) 923 spin_unlock_irqrestore(&pgd_lock, flags);
948 start = address + PGDIR_SIZE;
949 } 924 }
950#else /* CONFIG_X86_64 */ 925#else /* CONFIG_X86_64 */
951 /* 926 unsigned long start = VMALLOC_START & PGDIR_MASK;
952 * Note that races in the updates of insync and start aren't
953 * problematic: insync can only get set bits added, and updates to
954 * start are only improving performance (without affecting correctness
955 * if undone).
956 */
957 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
958 static unsigned long start = VMALLOC_START & PGDIR_MASK;
959 unsigned long address; 927 unsigned long address;
960 928
961 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { 929 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
962 if (!test_bit(pgd_index(address), insync)) { 930 const pgd_t *pgd_ref = pgd_offset_k(address);
963 const pgd_t *pgd_ref = pgd_offset_k(address); 931 unsigned long flags;
964 unsigned long flags; 932 struct page *page;
965 struct page *page; 933
966 934 if (pgd_none(*pgd_ref))
967 if (pgd_none(*pgd_ref)) 935 continue;
968 continue; 936 spin_lock_irqsave(&pgd_lock, flags);
969 spin_lock_irqsave(&pgd_lock, flags); 937 list_for_each_entry(page, &pgd_list, lru) {
970 list_for_each_entry(page, &pgd_list, lru) { 938 pgd_t *pgd;
971 pgd_t *pgd; 939 pgd = (pgd_t *)page_address(page) + pgd_index(address);
972 pgd = (pgd_t *)page_address(page) + pgd_index(address); 940 if (pgd_none(*pgd))
973 if (pgd_none(*pgd)) 941 set_pgd(pgd, *pgd_ref);
974 set_pgd(pgd, *pgd_ref); 942 else
975 else 943 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
976 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
977 }
978 spin_unlock_irqrestore(&pgd_lock, flags);
979 set_bit(pgd_index(address), insync);
980 } 944 }
981 if (address == start) 945 spin_unlock_irqrestore(&pgd_lock, flags);
982 start = address + PGDIR_SIZE;
983 } 946 }
984#endif 947#endif
985} 948}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ec30d10154b6..029e8cffca9e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
50 50
51unsigned int __VMALLOC_RESERVE = 128 << 20; 51unsigned int __VMALLOC_RESERVE = 128 << 20;
52 52
53unsigned long max_low_pfn_mapped;
53unsigned long max_pfn_mapped; 54unsigned long max_pfn_mapped;
54 55
55DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -57,6 +58,27 @@ unsigned long highstart_pfn, highend_pfn;
57 58
58static noinline int do_test_wp_bit(void); 59static noinline int do_test_wp_bit(void);
59 60
61
62static unsigned long __initdata table_start;
63static unsigned long __meminitdata table_end;
64static unsigned long __meminitdata table_top;
65
66static int __initdata after_init_bootmem;
67
68static __init void *alloc_low_page(unsigned long *phys)
69{
70 unsigned long pfn = table_end++;
71 void *adr;
72
73 if (pfn >= table_top)
74 panic("alloc_low_page: ran out of memory");
75
76 adr = __va(pfn * PAGE_SIZE);
77 memset(adr, 0, PAGE_SIZE);
78 *phys = pfn * PAGE_SIZE;
79 return adr;
80}
81
60/* 82/*
61 * Creates a middle page table and puts a pointer to it in the 83 * Creates a middle page table and puts a pointer to it in the
62 * given global directory entry. This only returns the gd entry 84 * given global directory entry. This only returns the gd entry
@@ -68,9 +90,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
68 pmd_t *pmd_table; 90 pmd_t *pmd_table;
69 91
70#ifdef CONFIG_X86_PAE 92#ifdef CONFIG_X86_PAE
93 unsigned long phys;
71 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { 94 if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
72 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 95 if (after_init_bootmem)
73 96 pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
97 else
98 pmd_table = (pmd_t *)alloc_low_page(&phys);
74 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); 99 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
75 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 100 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
76 pud = pud_offset(pgd, 0); 101 pud = pud_offset(pgd, 0);
@@ -92,12 +117,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
92 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { 117 if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
93 pte_t *page_table = NULL; 118 pte_t *page_table = NULL;
94 119
120 if (after_init_bootmem) {
95#ifdef CONFIG_DEBUG_PAGEALLOC 121#ifdef CONFIG_DEBUG_PAGEALLOC
96 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); 122 page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
97#endif 123#endif
98 if (!page_table) { 124 if (!page_table)
99 page_table = 125 page_table =
100 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); 126 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
127 } else {
128 unsigned long phys;
129 page_table = (pte_t *)alloc_low_page(&phys);
101 } 130 }
102 131
103 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); 132 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
@@ -155,38 +184,44 @@ static inline int is_kernel_text(unsigned long addr)
155 * of max_low_pfn pages, by creating page tables starting from address 184 * of max_low_pfn pages, by creating page tables starting from address
156 * PAGE_OFFSET: 185 * PAGE_OFFSET:
157 */ 186 */
158static void __init kernel_physical_mapping_init(pgd_t *pgd_base) 187static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
188 unsigned long start_pfn,
189 unsigned long end_pfn,
190 int use_pse)
159{ 191{
160 int pgd_idx, pmd_idx, pte_ofs; 192 int pgd_idx, pmd_idx, pte_ofs;
161 unsigned long pfn; 193 unsigned long pfn;
162 pgd_t *pgd; 194 pgd_t *pgd;
163 pmd_t *pmd; 195 pmd_t *pmd;
164 pte_t *pte; 196 pte_t *pte;
197 unsigned pages_2m = 0, pages_4k = 0;
165 198
166 pgd_idx = pgd_index(PAGE_OFFSET); 199 if (!cpu_has_pse)
167 pgd = pgd_base + pgd_idx; 200 use_pse = 0;
168 pfn = 0;
169 201
202 pfn = start_pfn;
203 pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
204 pgd = pgd_base + pgd_idx;
170 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { 205 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
171 pmd = one_md_table_init(pgd); 206 pmd = one_md_table_init(pgd);
172 if (pfn >= max_low_pfn)
173 continue;
174 207
175 for (pmd_idx = 0; 208 if (pfn >= end_pfn)
176 pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; 209 continue;
210#ifdef CONFIG_X86_PAE
211 pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
212 pmd += pmd_idx;
213#else
214 pmd_idx = 0;
215#endif
216 for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
177 pmd++, pmd_idx++) { 217 pmd++, pmd_idx++) {
178 unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; 218 unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
179 219
180 /* 220 /*
181 * Map with big pages if possible, otherwise 221 * Map with big pages if possible, otherwise
182 * create normal page tables: 222 * create normal page tables:
183 *
184 * Don't use a large page for the first 2/4MB of memory
185 * because there are often fixed size MTRRs in there
186 * and overlapping MTRRs into large pages can cause
187 * slowdowns.
188 */ 223 */
189 if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) { 224 if (use_pse) {
190 unsigned int addr2; 225 unsigned int addr2;
191 pgprot_t prot = PAGE_KERNEL_LARGE; 226 pgprot_t prot = PAGE_KERNEL_LARGE;
192 227
@@ -197,34 +232,30 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
197 is_kernel_text(addr2)) 232 is_kernel_text(addr2))
198 prot = PAGE_KERNEL_LARGE_EXEC; 233 prot = PAGE_KERNEL_LARGE_EXEC;
199 234
235 pages_2m++;
200 set_pmd(pmd, pfn_pmd(pfn, prot)); 236 set_pmd(pmd, pfn_pmd(pfn, prot));
201 237
202 pfn += PTRS_PER_PTE; 238 pfn += PTRS_PER_PTE;
203 max_pfn_mapped = pfn;
204 continue; 239 continue;
205 } 240 }
206 pte = one_page_table_init(pmd); 241 pte = one_page_table_init(pmd);
207 242
208 for (pte_ofs = 0; 243 pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
209 pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; 244 pte += pte_ofs;
245 for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
210 pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) { 246 pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
211 pgprot_t prot = PAGE_KERNEL; 247 pgprot_t prot = PAGE_KERNEL;
212 248
213 if (is_kernel_text(addr)) 249 if (is_kernel_text(addr))
214 prot = PAGE_KERNEL_EXEC; 250 prot = PAGE_KERNEL_EXEC;
215 251
252 pages_4k++;
216 set_pte(pte, pfn_pte(pfn, prot)); 253 set_pte(pte, pfn_pte(pfn, prot));
217 } 254 }
218 max_pfn_mapped = pfn;
219 } 255 }
220 } 256 }
221} 257 update_page_count(PG_LEVEL_2M, pages_2m);
222 258 update_page_count(PG_LEVEL_4K, pages_4k);
223static inline int page_kills_ppro(unsigned long pagenr)
224{
225 if (pagenr >= 0x70000 && pagenr <= 0x7003F)
226 return 1;
227 return 0;
228} 259}
229 260
230/* 261/*
@@ -287,29 +318,62 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
287 pkmap_page_table = pte; 318 pkmap_page_table = pte;
288} 319}
289 320
290void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) 321static void __init add_one_highpage_init(struct page *page, int pfn)
291{ 322{
292 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { 323 ClearPageReserved(page);
293 ClearPageReserved(page); 324 init_page_count(page);
294 init_page_count(page); 325 __free_page(page);
295 __free_page(page); 326 totalhigh_pages++;
296 totalhigh_pages++;
297 } else
298 SetPageReserved(page);
299} 327}
300 328
301#ifndef CONFIG_NUMA 329struct add_highpages_data {
302static void __init set_highmem_pages_init(int bad_ppro) 330 unsigned long start_pfn;
331 unsigned long end_pfn;
332};
333
334static int __init add_highpages_work_fn(unsigned long start_pfn,
335 unsigned long end_pfn, void *datax)
303{ 336{
304 int pfn; 337 int node_pfn;
338 struct page *page;
339 unsigned long final_start_pfn, final_end_pfn;
340 struct add_highpages_data *data;
305 341
306 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { 342 data = (struct add_highpages_data *)datax;
307 /* 343
308 * Holes under sparsemem might not have no mem_map[]: 344 final_start_pfn = max(start_pfn, data->start_pfn);
309 */ 345 final_end_pfn = min(end_pfn, data->end_pfn);
310 if (pfn_valid(pfn)) 346 if (final_start_pfn >= final_end_pfn)
311 add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 347 return 0;
348
349 for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
350 node_pfn++) {
351 if (!pfn_valid(node_pfn))
352 continue;
353 page = pfn_to_page(node_pfn);
354 add_one_highpage_init(page, node_pfn);
312 } 355 }
356
357 return 0;
358
359}
360
361void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
362 unsigned long end_pfn)
363{
364 struct add_highpages_data data;
365
366 data.start_pfn = start_pfn;
367 data.end_pfn = end_pfn;
368
369 work_with_active_regions(nid, add_highpages_work_fn, &data);
370}
371
372#ifndef CONFIG_NUMA
373static void __init set_highmem_pages_init(void)
374{
375 add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
376
313 totalram_pages += totalhigh_pages; 377 totalram_pages += totalhigh_pages;
314} 378}
315#endif /* !CONFIG_NUMA */ 379#endif /* !CONFIG_NUMA */
@@ -317,14 +381,9 @@ static void __init set_highmem_pages_init(int bad_ppro)
317#else 381#else
318# define kmap_init() do { } while (0) 382# define kmap_init() do { } while (0)
319# define permanent_kmaps_init(pgd_base) do { } while (0) 383# define permanent_kmaps_init(pgd_base) do { } while (0)
320# define set_highmem_pages_init(bad_ppro) do { } while (0) 384# define set_highmem_pages_init() do { } while (0)
321#endif /* CONFIG_HIGHMEM */ 385#endif /* CONFIG_HIGHMEM */
322 386
323pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
324EXPORT_SYMBOL(__PAGE_KERNEL);
325
326pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
327
328void __init native_pagetable_setup_start(pgd_t *base) 387void __init native_pagetable_setup_start(pgd_t *base)
329{ 388{
330 unsigned long pfn, va; 389 unsigned long pfn, va;
@@ -380,27 +439,10 @@ void __init native_pagetable_setup_done(pgd_t *base)
380 * be partially populated, and so it avoids stomping on any existing 439 * be partially populated, and so it avoids stomping on any existing
381 * mappings. 440 * mappings.
382 */ 441 */
383static void __init pagetable_init(void) 442static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
384{ 443{
385 pgd_t *pgd_base = swapper_pg_dir;
386 unsigned long vaddr, end; 444 unsigned long vaddr, end;
387 445
388 paravirt_pagetable_setup_start(pgd_base);
389
390 /* Enable PSE if available */
391 if (cpu_has_pse)
392 set_in_cr4(X86_CR4_PSE);
393
394 /* Enable PGE if available */
395 if (cpu_has_pge) {
396 set_in_cr4(X86_CR4_PGE);
397 __PAGE_KERNEL |= _PAGE_GLOBAL;
398 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
399 }
400
401 kernel_physical_mapping_init(pgd_base);
402 remap_numa_kva();
403
404 /* 446 /*
405 * Fixed mappings, only the page table structure has to be 447 * Fixed mappings, only the page table structure has to be
406 * created - mappings will be set by set_fixmap(): 448 * created - mappings will be set by set_fixmap():
@@ -410,6 +452,13 @@ static void __init pagetable_init(void)
410 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 452 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
411 page_table_range_init(vaddr, end, pgd_base); 453 page_table_range_init(vaddr, end, pgd_base);
412 early_ioremap_reset(); 454 early_ioremap_reset();
455}
456
457static void __init pagetable_init(void)
458{
459 pgd_t *pgd_base = swapper_pg_dir;
460
461 paravirt_pagetable_setup_start(pgd_base);
413 462
414 permanent_kmaps_init(pgd_base); 463 permanent_kmaps_init(pgd_base);
415 464
@@ -456,7 +505,7 @@ void zap_low_mappings(void)
456 505
457int nx_enabled; 506int nx_enabled;
458 507
459pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX; 508pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
460EXPORT_SYMBOL_GPL(__supported_pte_mask); 509EXPORT_SYMBOL_GPL(__supported_pte_mask);
461 510
462#ifdef CONFIG_X86_PAE 511#ifdef CONFIG_X86_PAE
@@ -509,27 +558,318 @@ static void __init set_nx(void)
509} 558}
510#endif 559#endif
511 560
561/* user-defined highmem size */
562static unsigned int highmem_pages = -1;
563
512/* 564/*
513 * paging_init() sets up the page tables - note that the first 8MB are 565 * highmem=size forces highmem to be exactly 'size' bytes.
514 * already mapped by head.S. 566 * This works even on boxes that have no highmem otherwise.
515 * 567 * This also works to reduce highmem size on bigger boxes.
516 * This routines also unmaps the page at virtual kernel address 0, so
517 * that we can trap those pesky NULL-reference errors in the kernel.
518 */ 568 */
519void __init paging_init(void) 569static int __init parse_highmem(char *arg)
570{
571 if (!arg)
572 return -EINVAL;
573
574 highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
575 return 0;
576}
577early_param("highmem", parse_highmem);
578
579/*
580 * Determine low and high memory ranges:
581 */
582void __init find_low_pfn_range(void)
520{ 583{
584 /* it could update max_pfn */
585
586 /* max_low_pfn is 0, we already have early_res support */
587
588 max_low_pfn = max_pfn;
589 if (max_low_pfn > MAXMEM_PFN) {
590 if (highmem_pages == -1)
591 highmem_pages = max_pfn - MAXMEM_PFN;
592 if (highmem_pages + MAXMEM_PFN < max_pfn)
593 max_pfn = MAXMEM_PFN + highmem_pages;
594 if (highmem_pages + MAXMEM_PFN > max_pfn) {
595 printk(KERN_WARNING "only %luMB highmem pages "
596 "available, ignoring highmem size of %uMB.\n",
597 pages_to_mb(max_pfn - MAXMEM_PFN),
598 pages_to_mb(highmem_pages));
599 highmem_pages = 0;
600 }
601 max_low_pfn = MAXMEM_PFN;
602#ifndef CONFIG_HIGHMEM
603 /* Maximum memory usable is what is directly addressable */
604 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
605 MAXMEM>>20);
606 if (max_pfn > MAX_NONPAE_PFN)
607 printk(KERN_WARNING
608 "Use a HIGHMEM64G enabled kernel.\n");
609 else
610 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
611 max_pfn = MAXMEM_PFN;
612#else /* !CONFIG_HIGHMEM */
613#ifndef CONFIG_HIGHMEM64G
614 if (max_pfn > MAX_NONPAE_PFN) {
615 max_pfn = MAX_NONPAE_PFN;
616 printk(KERN_WARNING "Warning only 4GB will be used."
617 "Use a HIGHMEM64G enabled kernel.\n");
618 }
619#endif /* !CONFIG_HIGHMEM64G */
620#endif /* !CONFIG_HIGHMEM */
621 } else {
622 if (highmem_pages == -1)
623 highmem_pages = 0;
624#ifdef CONFIG_HIGHMEM
625 if (highmem_pages >= max_pfn) {
626 printk(KERN_ERR "highmem size specified (%uMB) is "
627 "bigger than pages available (%luMB)!.\n",
628 pages_to_mb(highmem_pages),
629 pages_to_mb(max_pfn));
630 highmem_pages = 0;
631 }
632 if (highmem_pages) {
633 if (max_low_pfn - highmem_pages <
634 64*1024*1024/PAGE_SIZE){
635 printk(KERN_ERR "highmem size %uMB results in "
636 "smaller than 64MB lowmem, ignoring it.\n"
637 , pages_to_mb(highmem_pages));
638 highmem_pages = 0;
639 }
640 max_low_pfn -= highmem_pages;
641 }
642#else
643 if (highmem_pages)
644 printk(KERN_ERR "ignoring highmem size on non-highmem"
645 " kernel!\n");
646#endif
647 }
648}
649
650#ifndef CONFIG_NEED_MULTIPLE_NODES
651void __init initmem_init(unsigned long start_pfn,
652 unsigned long end_pfn)
653{
654#ifdef CONFIG_HIGHMEM
655 highstart_pfn = highend_pfn = max_pfn;
656 if (max_pfn > max_low_pfn)
657 highstart_pfn = max_low_pfn;
658 memory_present(0, 0, highend_pfn);
659 e820_register_active_regions(0, 0, highend_pfn);
660 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
661 pages_to_mb(highend_pfn - highstart_pfn));
662 num_physpages = highend_pfn;
663 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
664#else
665 memory_present(0, 0, max_low_pfn);
666 e820_register_active_regions(0, 0, max_low_pfn);
667 num_physpages = max_low_pfn;
668 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
669#endif
670#ifdef CONFIG_FLATMEM
671 max_mapnr = num_physpages;
672#endif
673 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
674 pages_to_mb(max_low_pfn));
675
676 setup_bootmem_allocator();
677}
678#endif /* !CONFIG_NEED_MULTIPLE_NODES */
679
680static void __init zone_sizes_init(void)
681{
682 unsigned long max_zone_pfns[MAX_NR_ZONES];
683 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
684 max_zone_pfns[ZONE_DMA] =
685 virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
686 max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
687#ifdef CONFIG_HIGHMEM
688 max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
689#endif
690
691 free_area_init_nodes(max_zone_pfns);
692}
693
694void __init setup_bootmem_allocator(void)
695{
696 int i;
697 unsigned long bootmap_size, bootmap;
698 /*
699 * Initialize the boot-time allocator (with low memory only):
700 */
701 bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
702 bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
703 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
704 PAGE_SIZE);
705 if (bootmap == -1L)
706 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
707 reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
708
709 /* don't touch min_low_pfn */
710 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
711 min_low_pfn, max_low_pfn);
712 printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
713 max_pfn_mapped<<PAGE_SHIFT);
714 printk(KERN_INFO " low ram: %08lx - %08lx\n",
715 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
716 printk(KERN_INFO " bootmap %08lx - %08lx\n",
717 bootmap, bootmap + bootmap_size);
718 for_each_online_node(i)
719 free_bootmem_with_active_regions(i, max_low_pfn);
720 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
721
722 after_init_bootmem = 1;
723}
724
725static void __init find_early_table_space(unsigned long end)
726{
727 unsigned long puds, pmds, ptes, tables, start;
728
729 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
730 tables = PAGE_ALIGN(puds * sizeof(pud_t));
731
732 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
733 tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
734
735 if (cpu_has_pse) {
736 unsigned long extra;
737
738 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
739 extra += PMD_SIZE;
740 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
741 } else
742 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
743
744 tables += PAGE_ALIGN(ptes * sizeof(pte_t));
745
746 /* for fixmap */
747 tables += PAGE_SIZE * 2;
748
749 /*
750 * RED-PEN putting page tables only on node 0 could
751 * cause a hotspot and fill up ZONE_DMA. The page tables
752 * need roughly 0.5KB per GB.
753 */
754 start = 0x7000;
755 table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
756 tables, PAGE_SIZE);
757 if (table_start == -1UL)
758 panic("Cannot find space for the kernel page tables");
759
760 table_start >>= PAGE_SHIFT;
761 table_end = table_start;
762 table_top = table_start + (tables>>PAGE_SHIFT);
763
764 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
765 end, table_start << PAGE_SHIFT,
766 (table_start << PAGE_SHIFT) + tables);
767}
768
769unsigned long __init_refok init_memory_mapping(unsigned long start,
770 unsigned long end)
771{
772 pgd_t *pgd_base = swapper_pg_dir;
773 unsigned long start_pfn, end_pfn;
774 unsigned long big_page_start;
775
776 /*
777 * Find space for the kernel direct mapping tables.
778 */
779 if (!after_init_bootmem)
780 find_early_table_space(end);
781
521#ifdef CONFIG_X86_PAE 782#ifdef CONFIG_X86_PAE
522 set_nx(); 783 set_nx();
523 if (nx_enabled) 784 if (nx_enabled)
524 printk(KERN_INFO "NX (Execute Disable) protection: active\n"); 785 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
525#endif 786#endif
526 pagetable_init(); 787
788 /* Enable PSE if available */
789 if (cpu_has_pse)
790 set_in_cr4(X86_CR4_PSE);
791
792 /* Enable PGE if available */
793 if (cpu_has_pge) {
794 set_in_cr4(X86_CR4_PGE);
795 __supported_pte_mask |= _PAGE_GLOBAL;
796 }
797
798 /*
799 * Don't use a large page for the first 2/4MB of memory
800 * because there are often fixed size MTRRs in there
801 * and overlapping MTRRs into large pages can cause
802 * slowdowns.
803 */
804 big_page_start = PMD_SIZE;
805
806 if (start < big_page_start) {
807 start_pfn = start >> PAGE_SHIFT;
808 end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
809 } else {
810 /* head is not big page alignment ? */
811 start_pfn = start >> PAGE_SHIFT;
812 end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
813 << (PMD_SHIFT - PAGE_SHIFT);
814 }
815 if (start_pfn < end_pfn)
816 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
817
818 /* big page range */
819 start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
820 << (PMD_SHIFT - PAGE_SHIFT);
821 if (start_pfn < (big_page_start >> PAGE_SHIFT))
822 start_pfn = big_page_start >> PAGE_SHIFT;
823 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
824 if (start_pfn < end_pfn)
825 kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
826 cpu_has_pse);
827
828 /* tail is not big page alignment ? */
829 start_pfn = end_pfn;
830 if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
831 end_pfn = end >> PAGE_SHIFT;
832 if (start_pfn < end_pfn)
833 kernel_physical_mapping_init(pgd_base, start_pfn,
834 end_pfn, 0);
835 }
836
837 early_ioremap_page_table_range_init(pgd_base);
527 838
528 load_cr3(swapper_pg_dir); 839 load_cr3(swapper_pg_dir);
529 840
530 __flush_tlb_all(); 841 __flush_tlb_all();
531 842
843 if (!after_init_bootmem)
844 reserve_early(table_start << PAGE_SHIFT,
845 table_end << PAGE_SHIFT, "PGTABLE");
846
847 return end >> PAGE_SHIFT;
848}
849
850
851/*
852 * paging_init() sets up the page tables - note that the first 8MB are
853 * already mapped by head.S.
854 *
855 * This routines also unmaps the page at virtual kernel address 0, so
856 * that we can trap those pesky NULL-reference errors in the kernel.
857 */
858void __init paging_init(void)
859{
860 pagetable_init();
861
862 __flush_tlb_all();
863
532 kmap_init(); 864 kmap_init();
865
866 /*
867 * NOTE: at this point the bootmem allocator is fully available.
868 */
869 sparse_init();
870 zone_sizes_init();
871
872 paravirt_post_allocator_init();
533} 873}
534 874
535/* 875/*
@@ -564,24 +904,11 @@ static struct kcore_list kcore_mem, kcore_vmalloc;
564void __init mem_init(void) 904void __init mem_init(void)
565{ 905{
566 int codesize, reservedpages, datasize, initsize; 906 int codesize, reservedpages, datasize, initsize;
567 int tmp, bad_ppro; 907 int tmp;
568 908
569#ifdef CONFIG_FLATMEM 909#ifdef CONFIG_FLATMEM
570 BUG_ON(!mem_map); 910 BUG_ON(!mem_map);
571#endif 911#endif
572 bad_ppro = ppro_with_ram_bug();
573
574#ifdef CONFIG_HIGHMEM
575 /* check that fixmap and pkmap do not overlap */
576 if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
577 printk(KERN_ERR
578 "fixmap and kmap areas overlap - this will crash\n");
579 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
580 PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
581 FIXADDR_START);
582 BUG();
583 }
584#endif
585 /* this will put all low memory onto the freelists */ 912 /* this will put all low memory onto the freelists */
586 totalram_pages += free_all_bootmem(); 913 totalram_pages += free_all_bootmem();
587 914
@@ -593,7 +920,7 @@ void __init mem_init(void)
593 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) 920 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
594 reservedpages++; 921 reservedpages++;
595 922
596 set_highmem_pages_init(bad_ppro); 923 set_highmem_pages_init();
597 924
598 codesize = (unsigned long) &_etext - (unsigned long) &_text; 925 codesize = (unsigned long) &_etext - (unsigned long) &_text;
599 datasize = (unsigned long) &_edata - (unsigned long) &_etext; 926 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
@@ -614,7 +941,6 @@ void __init mem_init(void)
614 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) 941 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
615 ); 942 );
616 943
617#if 1 /* double-sanity-check paranoia */
618 printk(KERN_INFO "virtual kernel memory layout:\n" 944 printk(KERN_INFO "virtual kernel memory layout:\n"
619 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" 945 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
620#ifdef CONFIG_HIGHMEM 946#ifdef CONFIG_HIGHMEM
@@ -655,7 +981,6 @@ void __init mem_init(void)
655#endif 981#endif
656 BUG_ON(VMALLOC_START > VMALLOC_END); 982 BUG_ON(VMALLOC_START > VMALLOC_END);
657 BUG_ON((unsigned long)high_memory > VMALLOC_START); 983 BUG_ON((unsigned long)high_memory > VMALLOC_START);
658#endif /* double-sanity-check paranoia */
659 984
660 if (boot_cpu_data.wp_works_ok < 0) 985 if (boot_cpu_data.wp_works_ok < 0)
661 test_wp_bit(); 986 test_wp_bit();
@@ -784,3 +1109,9 @@ void free_initrd_mem(unsigned long start, unsigned long end)
784 free_init_pages("initrd memory", start, end); 1109 free_init_pages("initrd memory", start, end);
785} 1110}
786#endif 1111#endif
1112
1113int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1114 int flags)
1115{
1116 return reserve_bootmem(phys, len, flags);
1117}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 819dad973b13..122bcef222fc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -18,6 +18,7 @@
18#include <linux/swap.h> 18#include <linux/swap.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/initrd.h>
21#include <linux/pagemap.h> 22#include <linux/pagemap.h>
22#include <linux/bootmem.h> 23#include <linux/bootmem.h>
23#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
@@ -47,6 +48,14 @@
47#include <asm/numa.h> 48#include <asm/numa.h>
48#include <asm/cacheflush.h> 49#include <asm/cacheflush.h>
49 50
51/*
52 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
53 * The direct mapping extends to max_pfn_mapped, so that we can directly access
54 * apertures, ACPI and other tables without having to play with fixmaps.
55 */
56unsigned long max_low_pfn_mapped;
57unsigned long max_pfn_mapped;
58
50static unsigned long dma_reserve __initdata; 59static unsigned long dma_reserve __initdata;
51 60
52DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 61DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -135,26 +144,17 @@ static __init void *spp_getpage(void)
135 return ptr; 144 return ptr;
136} 145}
137 146
138static __init void 147void
139set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) 148set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
140{ 149{
141 pgd_t *pgd;
142 pud_t *pud; 150 pud_t *pud;
143 pmd_t *pmd; 151 pmd_t *pmd;
144 pte_t *pte, new_pte; 152 pte_t *pte;
145
146 pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys);
147 153
148 pgd = pgd_offset_k(vaddr); 154 pud = pud_page + pud_index(vaddr);
149 if (pgd_none(*pgd)) {
150 printk(KERN_ERR
151 "PGD FIXMAP MISSING, it should be setup in head.S!\n");
152 return;
153 }
154 pud = pud_offset(pgd, vaddr);
155 if (pud_none(*pud)) { 155 if (pud_none(*pud)) {
156 pmd = (pmd_t *) spp_getpage(); 156 pmd = (pmd_t *) spp_getpage();
157 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); 157 pud_populate(&init_mm, pud, pmd);
158 if (pmd != pmd_offset(pud, 0)) { 158 if (pmd != pmd_offset(pud, 0)) {
159 printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", 159 printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
160 pmd, pmd_offset(pud, 0)); 160 pmd, pmd_offset(pud, 0));
@@ -164,13 +164,12 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
164 pmd = pmd_offset(pud, vaddr); 164 pmd = pmd_offset(pud, vaddr);
165 if (pmd_none(*pmd)) { 165 if (pmd_none(*pmd)) {
166 pte = (pte_t *) spp_getpage(); 166 pte = (pte_t *) spp_getpage();
167 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); 167 pmd_populate_kernel(&init_mm, pmd, pte);
168 if (pte != pte_offset_kernel(pmd, 0)) { 168 if (pte != pte_offset_kernel(pmd, 0)) {
169 printk(KERN_ERR "PAGETABLE BUG #02!\n"); 169 printk(KERN_ERR "PAGETABLE BUG #02!\n");
170 return; 170 return;
171 } 171 }
172 } 172 }
173 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
174 173
175 pte = pte_offset_kernel(pmd, vaddr); 174 pte = pte_offset_kernel(pmd, vaddr);
176 if (!pte_none(*pte) && pte_val(new_pte) && 175 if (!pte_none(*pte) && pte_val(new_pte) &&
@@ -185,6 +184,64 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
185 __flush_tlb_one(vaddr); 184 __flush_tlb_one(vaddr);
186} 185}
187 186
187void
188set_pte_vaddr(unsigned long vaddr, pte_t pteval)
189{
190 pgd_t *pgd;
191 pud_t *pud_page;
192
193 pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
194
195 pgd = pgd_offset_k(vaddr);
196 if (pgd_none(*pgd)) {
197 printk(KERN_ERR
198 "PGD FIXMAP MISSING, it should be setup in head.S!\n");
199 return;
200 }
201 pud_page = (pud_t*)pgd_page_vaddr(*pgd);
202 set_pte_vaddr_pud(pud_page, vaddr, pteval);
203}
204
205/*
206 * Create large page table mappings for a range of physical addresses.
207 */
208static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
209 pgprot_t prot)
210{
211 pgd_t *pgd;
212 pud_t *pud;
213 pmd_t *pmd;
214
215 BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
216 for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
217 pgd = pgd_offset_k((unsigned long)__va(phys));
218 if (pgd_none(*pgd)) {
219 pud = (pud_t *) spp_getpage();
220 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
221 _PAGE_USER));
222 }
223 pud = pud_offset(pgd, (unsigned long)__va(phys));
224 if (pud_none(*pud)) {
225 pmd = (pmd_t *) spp_getpage();
226 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
227 _PAGE_USER));
228 }
229 pmd = pmd_offset(pud, phys);
230 BUG_ON(!pmd_none(*pmd));
231 set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
232 }
233}
234
235void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
236{
237 __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
238}
239
240void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
241{
242 __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
243}
244
188/* 245/*
189 * The head.S code sets up the kernel high mapping: 246 * The head.S code sets up the kernel high mapping:
190 * 247 *
@@ -213,20 +270,9 @@ void __init cleanup_highmap(void)
213 } 270 }
214} 271}
215 272
216/* NOTE: this is meant to be run only at boot */
217void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
218{
219 unsigned long address = __fix_to_virt(idx);
220
221 if (idx >= __end_of_fixed_addresses) {
222 printk(KERN_ERR "Invalid __set_fixmap\n");
223 return;
224 }
225 set_pte_phys(address, phys, prot);
226}
227
228static unsigned long __initdata table_start; 273static unsigned long __initdata table_start;
229static unsigned long __meminitdata table_end; 274static unsigned long __meminitdata table_end;
275static unsigned long __meminitdata table_top;
230 276
231static __meminit void *alloc_low_page(unsigned long *phys) 277static __meminit void *alloc_low_page(unsigned long *phys)
232{ 278{
@@ -240,7 +286,7 @@ static __meminit void *alloc_low_page(unsigned long *phys)
240 return adr; 286 return adr;
241 } 287 }
242 288
243 if (pfn >= end_pfn) 289 if (pfn >= table_top)
244 panic("alloc_low_page: ran out of memory"); 290 panic("alloc_low_page: ran out of memory");
245 291
246 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); 292 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -257,65 +303,61 @@ static __meminit void unmap_low_page(void *adr)
257 early_iounmap(adr, PAGE_SIZE); 303 early_iounmap(adr, PAGE_SIZE);
258} 304}
259 305
260/* Must run before zap_low_mappings */ 306static unsigned long __meminit
261__meminit void *early_ioremap(unsigned long addr, unsigned long size) 307phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
262{ 308{
263 pmd_t *pmd, *last_pmd; 309 unsigned pages = 0;
264 unsigned long vaddr; 310 unsigned long last_map_addr = end;
265 int i, pmds; 311 int i;
312
313 pte_t *pte = pte_page + pte_index(addr);
266 314
267 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; 315 for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
268 vaddr = __START_KERNEL_map;
269 pmd = level2_kernel_pgt;
270 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
271 316
272 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { 317 if (addr >= end) {
273 for (i = 0; i < pmds; i++) { 318 if (!after_bootmem) {
274 if (pmd_present(pmd[i])) 319 for(; i < PTRS_PER_PTE; i++, pte++)
275 goto continue_outer_loop; 320 set_pte(pte, __pte(0));
321 }
322 break;
276 } 323 }
277 vaddr += addr & ~PMD_MASK;
278 addr &= PMD_MASK;
279 324
280 for (i = 0; i < pmds; i++, addr += PMD_SIZE) 325 if (pte_val(*pte))
281 set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); 326 continue;
282 __flush_tlb_all();
283 327
284 return (void *)vaddr; 328 if (0)
285continue_outer_loop: 329 printk(" pte=%p addr=%lx pte=%016lx\n",
286 ; 330 pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
331 set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
332 last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
333 pages++;
287 } 334 }
288 printk(KERN_ERR "early_ioremap(0x%lx, %lu) failed\n", addr, size); 335 update_page_count(PG_LEVEL_4K, pages);
289 336
290 return NULL; 337 return last_map_addr;
291} 338}
292 339
293/* 340static unsigned long __meminit
294 * To avoid virtual aliases later: 341phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
295 */
296__meminit void early_iounmap(void *addr, unsigned long size)
297{ 342{
298 unsigned long vaddr; 343 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
299 pmd_t *pmd;
300 int i, pmds;
301
302 vaddr = (unsigned long)addr;
303 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
304 pmd = level2_kernel_pgt + pmd_index(vaddr);
305
306 for (i = 0; i < pmds; i++)
307 pmd_clear(pmd + i);
308 344
309 __flush_tlb_all(); 345 return phys_pte_init(pte, address, end);
310} 346}
311 347
312static unsigned long __meminit 348static unsigned long __meminit
313phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) 349phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
350 unsigned long page_size_mask)
314{ 351{
352 unsigned long pages = 0;
353 unsigned long last_map_addr = end;
354
315 int i = pmd_index(address); 355 int i = pmd_index(address);
316 356
317 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { 357 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
358 unsigned long pte_phys;
318 pmd_t *pmd = pmd_page + pmd_index(address); 359 pmd_t *pmd = pmd_page + pmd_index(address);
360 pte_t *pte;
319 361
320 if (address >= end) { 362 if (address >= end) {
321 if (!after_bootmem) { 363 if (!after_bootmem) {
@@ -325,31 +367,50 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
325 break; 367 break;
326 } 368 }
327 369
328 if (pmd_val(*pmd)) 370 if (pmd_val(*pmd)) {
371 if (!pmd_large(*pmd))
372 last_map_addr = phys_pte_update(pmd, address,
373 end);
329 continue; 374 continue;
375 }
330 376
331 set_pte((pte_t *)pmd, 377 if (page_size_mask & (1<<PG_LEVEL_2M)) {
332 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 378 pages++;
379 set_pte((pte_t *)pmd,
380 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
381 last_map_addr = (address & PMD_MASK) + PMD_SIZE;
382 continue;
383 }
384
385 pte = alloc_low_page(&pte_phys);
386 last_map_addr = phys_pte_init(pte, address, end);
387 unmap_low_page(pte);
388
389 pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
333 } 390 }
334 return address; 391 update_page_count(PG_LEVEL_2M, pages);
392 return last_map_addr;
335} 393}
336 394
337static unsigned long __meminit 395static unsigned long __meminit
338phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) 396phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
397 unsigned long page_size_mask)
339{ 398{
340 pmd_t *pmd = pmd_offset(pud, 0); 399 pmd_t *pmd = pmd_offset(pud, 0);
341 unsigned long last_map_addr; 400 unsigned long last_map_addr;
342 401
343 spin_lock(&init_mm.page_table_lock); 402 spin_lock(&init_mm.page_table_lock);
344 last_map_addr = phys_pmd_init(pmd, address, end); 403 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
345 spin_unlock(&init_mm.page_table_lock); 404 spin_unlock(&init_mm.page_table_lock);
346 __flush_tlb_all(); 405 __flush_tlb_all();
347 return last_map_addr; 406 return last_map_addr;
348} 407}
349 408
350static unsigned long __meminit 409static unsigned long __meminit
351phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) 410phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
411 unsigned long page_size_mask)
352{ 412{
413 unsigned long pages = 0;
353 unsigned long last_map_addr = end; 414 unsigned long last_map_addr = end;
354 int i = pud_index(addr); 415 int i = pud_index(addr);
355 416
@@ -369,11 +430,13 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
369 430
370 if (pud_val(*pud)) { 431 if (pud_val(*pud)) {
371 if (!pud_large(*pud)) 432 if (!pud_large(*pud))
372 last_map_addr = phys_pmd_update(pud, addr, end); 433 last_map_addr = phys_pmd_update(pud, addr, end,
434 page_size_mask);
373 continue; 435 continue;
374 } 436 }
375 437
376 if (direct_gbpages) { 438 if (page_size_mask & (1<<PG_LEVEL_1G)) {
439 pages++;
377 set_pte((pte_t *)pud, 440 set_pte((pte_t *)pud,
378 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 441 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
379 last_map_addr = (addr & PUD_MASK) + PUD_SIZE; 442 last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
@@ -383,27 +446,50 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
383 pmd = alloc_low_page(&pmd_phys); 446 pmd = alloc_low_page(&pmd_phys);
384 447
385 spin_lock(&init_mm.page_table_lock); 448 spin_lock(&init_mm.page_table_lock);
386 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); 449 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
387 last_map_addr = phys_pmd_init(pmd, addr, end); 450 unmap_low_page(pmd);
451 pud_populate(&init_mm, pud, __va(pmd_phys));
388 spin_unlock(&init_mm.page_table_lock); 452 spin_unlock(&init_mm.page_table_lock);
389 453
390 unmap_low_page(pmd);
391 } 454 }
392 __flush_tlb_all(); 455 __flush_tlb_all();
456 update_page_count(PG_LEVEL_1G, pages);
393 457
394 return last_map_addr >> PAGE_SHIFT; 458 return last_map_addr;
459}
460
461static unsigned long __meminit
462phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
463 unsigned long page_size_mask)
464{
465 pud_t *pud;
466
467 pud = (pud_t *)pgd_page_vaddr(*pgd);
468
469 return phys_pud_init(pud, addr, end, page_size_mask);
395} 470}
396 471
397static void __init find_early_table_space(unsigned long end) 472static void __init find_early_table_space(unsigned long end)
398{ 473{
399 unsigned long puds, pmds, tables, start; 474 unsigned long puds, pmds, ptes, tables, start;
400 475
401 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 476 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
402 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); 477 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
403 if (!direct_gbpages) { 478 if (direct_gbpages) {
479 unsigned long extra;
480 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
481 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
482 } else
404 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 483 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
405 tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); 484 tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
406 } 485
486 if (cpu_has_pse) {
487 unsigned long extra;
488 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
489 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
490 } else
491 ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
492 tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
407 493
408 /* 494 /*
409 * RED-PEN putting page tables only on node 0 could 495 * RED-PEN putting page tables only on node 0 could
@@ -417,10 +503,10 @@ static void __init find_early_table_space(unsigned long end)
417 503
418 table_start >>= PAGE_SHIFT; 504 table_start >>= PAGE_SHIFT;
419 table_end = table_start; 505 table_end = table_start;
506 table_top = table_start + (tables >> PAGE_SHIFT);
420 507
421 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", 508 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
422 end, table_start << PAGE_SHIFT, 509 end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
423 (table_start << PAGE_SHIFT) + tables);
424} 510}
425 511
426static void __init init_gbpages(void) 512static void __init init_gbpages(void)
@@ -431,7 +517,7 @@ static void __init init_gbpages(void)
431 direct_gbpages = 0; 517 direct_gbpages = 0;
432} 518}
433 519
434#ifdef CONFIG_MEMTEST_BOOTPARAM 520#ifdef CONFIG_MEMTEST
435 521
436static void __init memtest(unsigned long start_phys, unsigned long size, 522static void __init memtest(unsigned long start_phys, unsigned long size,
437 unsigned pattern) 523 unsigned pattern)
@@ -493,7 +579,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size,
493 579
494} 580}
495 581
496static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; 582/* default is disabled */
583static int memtest_pattern __initdata;
497 584
498static int __init parse_memtest(char *arg) 585static int __init parse_memtest(char *arg)
499{ 586{
@@ -542,15 +629,85 @@ static void __init early_memtest(unsigned long start, unsigned long end)
542} 629}
543#endif 630#endif
544 631
632static unsigned long __init kernel_physical_mapping_init(unsigned long start,
633 unsigned long end,
634 unsigned long page_size_mask)
635{
636
637 unsigned long next, last_map_addr = end;
638
639 start = (unsigned long)__va(start);
640 end = (unsigned long)__va(end);
641
642 for (; start < end; start = next) {
643 pgd_t *pgd = pgd_offset_k(start);
644 unsigned long pud_phys;
645 pud_t *pud;
646
647 next = start + PGDIR_SIZE;
648 if (next > end)
649 next = end;
650
651 if (pgd_val(*pgd)) {
652 last_map_addr = phys_pud_update(pgd, __pa(start),
653 __pa(end), page_size_mask);
654 continue;
655 }
656
657 if (after_bootmem)
658 pud = pud_offset(pgd, start & PGDIR_MASK);
659 else
660 pud = alloc_low_page(&pud_phys);
661
662 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
663 page_size_mask);
664 unmap_low_page(pud);
665 pgd_populate(&init_mm, pgd_offset_k(start),
666 __va(pud_phys));
667 }
668
669 return last_map_addr;
670}
671
672struct map_range {
673 unsigned long start;
674 unsigned long end;
675 unsigned page_size_mask;
676};
677
678#define NR_RANGE_MR 5
679
680static int save_mr(struct map_range *mr, int nr_range,
681 unsigned long start_pfn, unsigned long end_pfn,
682 unsigned long page_size_mask)
683{
684
685 if (start_pfn < end_pfn) {
686 if (nr_range >= NR_RANGE_MR)
687 panic("run out of range for init_memory_mapping\n");
688 mr[nr_range].start = start_pfn<<PAGE_SHIFT;
689 mr[nr_range].end = end_pfn<<PAGE_SHIFT;
690 mr[nr_range].page_size_mask = page_size_mask;
691 nr_range++;
692 }
693
694 return nr_range;
695}
696
545/* 697/*
546 * Setup the direct mapping of the physical memory at PAGE_OFFSET. 698 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
547 * This runs before bootmem is initialized and gets pages directly from 699 * This runs before bootmem is initialized and gets pages directly from
548 * the physical memory. To access them they are temporarily mapped. 700 * the physical memory. To access them they are temporarily mapped.
549 */ 701 */
550unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) 702unsigned long __init_refok init_memory_mapping(unsigned long start,
703 unsigned long end)
551{ 704{
552 unsigned long next, last_map_addr = end; 705 unsigned long last_map_addr = 0;
553 unsigned long start_phys = start, end_phys = end; 706 unsigned long page_size_mask = 0;
707 unsigned long start_pfn, end_pfn;
708
709 struct map_range mr[NR_RANGE_MR];
710 int nr_range, i;
554 711
555 printk(KERN_INFO "init_memory_mapping\n"); 712 printk(KERN_INFO "init_memory_mapping\n");
556 713
@@ -561,48 +718,101 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon
561 * memory mapped. Unfortunately this is done currently before the 718 * memory mapped. Unfortunately this is done currently before the
562 * nodes are discovered. 719 * nodes are discovered.
563 */ 720 */
564 if (!after_bootmem) { 721 if (!after_bootmem)
565 init_gbpages(); 722 init_gbpages();
566 find_early_table_space(end);
567 }
568 723
569 start = (unsigned long)__va(start); 724 if (direct_gbpages)
570 end = (unsigned long)__va(end); 725 page_size_mask |= 1 << PG_LEVEL_1G;
726 if (cpu_has_pse)
727 page_size_mask |= 1 << PG_LEVEL_2M;
728
729 memset(mr, 0, sizeof(mr));
730 nr_range = 0;
731
732 /* head if not big page alignment ?*/
733 start_pfn = start >> PAGE_SHIFT;
734 end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
735 << (PMD_SHIFT - PAGE_SHIFT);
736 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
737
738 /* big page (2M) range*/
739 start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
740 << (PMD_SHIFT - PAGE_SHIFT);
741 end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
742 << (PUD_SHIFT - PAGE_SHIFT);
743 if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
744 end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
745 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
746 page_size_mask & (1<<PG_LEVEL_2M));
747
748 /* big page (1G) range */
749 start_pfn = end_pfn;
750 end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
751 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
752 page_size_mask &
753 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
754
755 /* tail is not big page (1G) alignment */
756 start_pfn = end_pfn;
757 end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
758 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
759 page_size_mask & (1<<PG_LEVEL_2M));
760
761 /* tail is not big page (2M) alignment */
762 start_pfn = end_pfn;
763 end_pfn = end>>PAGE_SHIFT;
764 nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
765
766 for (i = 0; i < nr_range; i++)
767 printk(KERN_DEBUG " %010lx - %010lx page %s\n",
768 mr[i].start, mr[i].end,
769 (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
770 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
571 771
572 for (; start < end; start = next) { 772 if (!after_bootmem)
573 pgd_t *pgd = pgd_offset_k(start); 773 find_early_table_space(end);
574 unsigned long pud_phys;
575 pud_t *pud;
576
577 if (after_bootmem)
578 pud = pud_offset(pgd, start & PGDIR_MASK);
579 else
580 pud = alloc_low_page(&pud_phys);
581 774
582 next = start + PGDIR_SIZE; 775 for (i = 0; i < nr_range; i++)
583 if (next > end) 776 last_map_addr = kernel_physical_mapping_init(
584 next = end; 777 mr[i].start, mr[i].end,
585 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); 778 mr[i].page_size_mask);
586 if (!after_bootmem)
587 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
588 unmap_low_page(pud);
589 }
590 779
591 if (!after_bootmem) 780 if (!after_bootmem)
592 mmu_cr4_features = read_cr4(); 781 mmu_cr4_features = read_cr4();
593 __flush_tlb_all(); 782 __flush_tlb_all();
594 783
595 if (!after_bootmem) 784 if (!after_bootmem && table_end > table_start)
596 reserve_early(table_start << PAGE_SHIFT, 785 reserve_early(table_start << PAGE_SHIFT,
597 table_end << PAGE_SHIFT, "PGTABLE"); 786 table_end << PAGE_SHIFT, "PGTABLE");
598 787
788 printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
789 last_map_addr, end);
790
599 if (!after_bootmem) 791 if (!after_bootmem)
600 early_memtest(start_phys, end_phys); 792 early_memtest(start, end);
601 793
602 return last_map_addr; 794 return last_map_addr >> PAGE_SHIFT;
603} 795}
604 796
605#ifndef CONFIG_NUMA 797#ifndef CONFIG_NUMA
798void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
799{
800 unsigned long bootmap_size, bootmap;
801
802 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
803 bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
804 PAGE_SIZE);
805 if (bootmap == -1L)
806 panic("Cannot find bootmem map of size %ld\n", bootmap_size);
807 /* don't touch min_low_pfn */
808 bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
809 0, end_pfn);
810 e820_register_active_regions(0, start_pfn, end_pfn);
811 free_bootmem_with_active_regions(0, end_pfn);
812 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
813 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
814}
815
606void __init paging_init(void) 816void __init paging_init(void)
607{ 817{
608 unsigned long max_zone_pfns[MAX_NR_ZONES]; 818 unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -610,9 +820,9 @@ void __init paging_init(void)
610 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 820 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
611 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 821 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
612 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 822 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
613 max_zone_pfns[ZONE_NORMAL] = end_pfn; 823 max_zone_pfns[ZONE_NORMAL] = max_pfn;
614 824
615 memory_present(0, 0, end_pfn); 825 memory_present(0, 0, max_pfn);
616 sparse_init(); 826 sparse_init();
617 free_area_init_nodes(max_zone_pfns); 827 free_area_init_nodes(max_zone_pfns);
618} 828}
@@ -694,8 +904,8 @@ void __init mem_init(void)
694#else 904#else
695 totalram_pages = free_all_bootmem(); 905 totalram_pages = free_all_bootmem();
696#endif 906#endif
697 reservedpages = end_pfn - totalram_pages - 907 reservedpages = max_pfn - totalram_pages -
698 absent_pages_in_range(0, end_pfn); 908 absent_pages_in_range(0, max_pfn);
699 after_bootmem = 1; 909 after_bootmem = 1;
700 910
701 codesize = (unsigned long) &_etext - (unsigned long) &_text; 911 codesize = (unsigned long) &_etext - (unsigned long) &_text;
@@ -714,7 +924,7 @@ void __init mem_init(void)
714 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " 924 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
715 "%ldk reserved, %ldk data, %ldk init)\n", 925 "%ldk reserved, %ldk data, %ldk init)\n",
716 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), 926 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
717 end_pfn << (PAGE_SHIFT-10), 927 max_pfn << (PAGE_SHIFT-10),
718 codesize >> 10, 928 codesize >> 10,
719 reservedpages << (PAGE_SHIFT-10), 929 reservedpages << (PAGE_SHIFT-10),
720 datasize >> 10, 930 datasize >> 10,
@@ -799,24 +1009,26 @@ void free_initrd_mem(unsigned long start, unsigned long end)
799} 1009}
800#endif 1010#endif
801 1011
802void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 1012int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
1013 int flags)
803{ 1014{
804#ifdef CONFIG_NUMA 1015#ifdef CONFIG_NUMA
805 int nid, next_nid; 1016 int nid, next_nid;
1017 int ret;
806#endif 1018#endif
807 unsigned long pfn = phys >> PAGE_SHIFT; 1019 unsigned long pfn = phys >> PAGE_SHIFT;
808 1020
809 if (pfn >= end_pfn) { 1021 if (pfn >= max_pfn) {
810 /* 1022 /*
811 * This can happen with kdump kernels when accessing 1023 * This can happen with kdump kernels when accessing
812 * firmware tables: 1024 * firmware tables:
813 */ 1025 */
814 if (pfn < max_pfn_mapped) 1026 if (pfn < max_pfn_mapped)
815 return; 1027 return -EFAULT;
816 1028
817 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", 1029 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
818 phys, len); 1030 phys, len);
819 return; 1031 return -EFAULT;
820 } 1032 }
821 1033
822 /* Should check here against the e820 map to avoid double free */ 1034 /* Should check here against the e820 map to avoid double free */
@@ -824,9 +1036,13 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
824 nid = phys_to_nid(phys); 1036 nid = phys_to_nid(phys);
825 next_nid = phys_to_nid(phys + len - 1); 1037 next_nid = phys_to_nid(phys + len - 1);
826 if (nid == next_nid) 1038 if (nid == next_nid)
827 reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); 1039 ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
828 else 1040 else
829 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 1041 ret = reserve_bootmem(phys, len, flags);
1042
1043 if (ret != 0)
1044 return ret;
1045
830#else 1046#else
831 reserve_bootmem(phys, len, BOOTMEM_DEFAULT); 1047 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
832#endif 1048#endif
@@ -835,6 +1051,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
835 dma_reserve += len / PAGE_SIZE; 1051 dma_reserve += len / PAGE_SIZE;
836 set_dma_reserve(dma_reserve); 1052 set_dma_reserve(dma_reserve);
837 } 1053 }
1054
1055 return 0;
838} 1056}
839 1057
840int kern_addr_valid(unsigned long addr) 1058int kern_addr_valid(unsigned long addr)
@@ -939,7 +1157,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
939 pmd_t *pmd; 1157 pmd_t *pmd;
940 1158
941 for (; addr < end; addr = next) { 1159 for (; addr < end; addr = next) {
942 next = pmd_addr_end(addr, end); 1160 void *p = NULL;
943 1161
944 pgd = vmemmap_pgd_populate(addr, node); 1162 pgd = vmemmap_pgd_populate(addr, node);
945 if (!pgd) 1163 if (!pgd)
@@ -949,33 +1167,51 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
949 if (!pud) 1167 if (!pud)
950 return -ENOMEM; 1168 return -ENOMEM;
951 1169
952 pmd = pmd_offset(pud, addr); 1170 if (!cpu_has_pse) {
953 if (pmd_none(*pmd)) { 1171 next = (addr + PAGE_SIZE) & PAGE_MASK;
954 pte_t entry; 1172 pmd = vmemmap_pmd_populate(pud, addr, node);
955 void *p; 1173
1174 if (!pmd)
1175 return -ENOMEM;
1176
1177 p = vmemmap_pte_populate(pmd, addr, node);
956 1178
957 p = vmemmap_alloc_block(PMD_SIZE, node);
958 if (!p) 1179 if (!p)
959 return -ENOMEM; 1180 return -ENOMEM;
960 1181
961 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, 1182 addr_end = addr + PAGE_SIZE;
962 PAGE_KERNEL_LARGE); 1183 p_end = p + PAGE_SIZE;
963 set_pmd(pmd, __pmd(pte_val(entry)));
964
965 /* check to see if we have contiguous blocks */
966 if (p_end != p || node_start != node) {
967 if (p_start)
968 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
969 addr_start, addr_end-1, p_start, p_end-1, node_start);
970 addr_start = addr;
971 node_start = node;
972 p_start = p;
973 }
974 addr_end = addr + PMD_SIZE;
975 p_end = p + PMD_SIZE;
976 } else { 1184 } else {
977 vmemmap_verify((pte_t *)pmd, node, addr, next); 1185 next = pmd_addr_end(addr, end);
1186
1187 pmd = pmd_offset(pud, addr);
1188 if (pmd_none(*pmd)) {
1189 pte_t entry;
1190
1191 p = vmemmap_alloc_block(PMD_SIZE, node);
1192 if (!p)
1193 return -ENOMEM;
1194
1195 entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
1196 PAGE_KERNEL_LARGE);
1197 set_pmd(pmd, __pmd(pte_val(entry)));
1198
1199 /* check to see if we have contiguous blocks */
1200 if (p_end != p || node_start != node) {
1201 if (p_start)
1202 printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
1203 addr_start, addr_end-1, p_start, p_end-1, node_start);
1204 addr_start = addr;
1205 node_start = node;
1206 p_start = p;
1207 }
1208
1209 addr_end = addr + PMD_SIZE;
1210 p_end = p + PMD_SIZE;
1211 } else
1212 vmemmap_verify((pte_t *)pmd, node, addr, next);
978 } 1213 }
1214
979 } 1215 }
980 return 0; 1216 return 0;
981} 1217}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d1b867101e5f..115f13ee40c9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
142 /* 142 /*
143 * Don't remap the low PCI/ISA area, it's always mapped.. 143 * Don't remap the low PCI/ISA area, it's always mapped..
144 */ 144 */
145 if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) 145 if (is_ISA_range(phys_addr, last_addr))
146 return (__force void __iomem *)phys_to_virt(phys_addr); 146 return (__force void __iomem *)phys_to_virt(phys_addr);
147 147
148 /* 148 /*
@@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
261{ 261{
262 /* 262 /*
263 * Ideally, this should be: 263 * Ideally, this should be:
264 * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; 264 * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
265 * 265 *
266 * Till we fix all X drivers to use ioremap_wc(), we will use 266 * Till we fix all X drivers to use ioremap_wc(), we will use
267 * UC MINUS. 267 * UC MINUS.
@@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache);
285 */ 285 */
286void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) 286void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
287{ 287{
288 if (pat_wc_enabled) 288 if (pat_enabled)
289 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, 289 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
290 __builtin_return_address(0)); 290 __builtin_return_address(0));
291 else 291 else
@@ -341,8 +341,8 @@ void iounmap(volatile void __iomem *addr)
341 * vm_area and by simply returning an address into the kernel mapping 341 * vm_area and by simply returning an address into the kernel mapping
342 * of ISA space. So handle that here. 342 * of ISA space. So handle that here.
343 */ 343 */
344 if (addr >= phys_to_virt(ISA_START_ADDRESS) && 344 if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) &&
345 addr < phys_to_virt(ISA_END_ADDRESS)) 345 (void __force *)addr < phys_to_virt(ISA_END_ADDRESS))
346 return; 346 return;
347 347
348 addr = (volatile void __iomem *) 348 addr = (volatile void __iomem *)
@@ -355,7 +355,7 @@ void iounmap(volatile void __iomem *addr)
355 cpa takes care of the direct mappings. */ 355 cpa takes care of the direct mappings. */
356 read_lock(&vmlist_lock); 356 read_lock(&vmlist_lock);
357 for (p = vmlist; p; p = p->next) { 357 for (p = vmlist; p; p = p->next) {
358 if (p->addr == addr) 358 if (p->addr == (void __force *)addr)
359 break; 359 break;
360 } 360 }
361 read_unlock(&vmlist_lock); 361 read_unlock(&vmlist_lock);
@@ -369,7 +369,7 @@ void iounmap(volatile void __iomem *addr)
369 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); 369 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
370 370
371 /* Finally remove it */ 371 /* Finally remove it */
372 o = remove_vm_area((void *)addr); 372 o = remove_vm_area((void __force *)addr);
373 BUG_ON(p != o || o == NULL); 373 BUG_ON(p != o || o == NULL);
374 kfree(p); 374 kfree(p);
375} 375}
@@ -388,7 +388,7 @@ void *xlate_dev_mem_ptr(unsigned long phys)
388 if (page_is_ram(start >> PAGE_SHIFT)) 388 if (page_is_ram(start >> PAGE_SHIFT))
389 return __va(phys); 389 return __va(phys);
390 390
391 addr = (void *)ioremap_default(start, PAGE_SIZE); 391 addr = (void __force *)ioremap_default(start, PAGE_SIZE);
392 if (addr) 392 if (addr)
393 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); 393 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
394 394
@@ -404,8 +404,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
404 return; 404 return;
405} 405}
406 406
407#ifdef CONFIG_X86_32
408
409int __initdata early_ioremap_debug; 407int __initdata early_ioremap_debug;
410 408
411static int __init early_ioremap_debug_setup(char *str) 409static int __init early_ioremap_debug_setup(char *str)
@@ -417,8 +415,7 @@ static int __init early_ioremap_debug_setup(char *str)
417early_param("early_ioremap_debug", early_ioremap_debug_setup); 415early_param("early_ioremap_debug", early_ioremap_debug_setup);
418 416
419static __initdata int after_paging_init; 417static __initdata int after_paging_init;
420static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] 418static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
421 __section(.bss.page_aligned);
422 419
423static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) 420static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
424{ 421{
@@ -507,10 +504,11 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
507 return; 504 return;
508 } 505 }
509 pte = early_ioremap_pte(addr); 506 pte = early_ioremap_pte(addr);
507
510 if (pgprot_val(flags)) 508 if (pgprot_val(flags))
511 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); 509 set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
512 else 510 else
513 pte_clear(NULL, addr, pte); 511 pte_clear(&init_mm, addr, pte);
514 __flush_tlb_one(addr); 512 __flush_tlb_one(addr);
515} 513}
516 514
@@ -648,5 +646,3 @@ void __this_fixmap_does_not_exist(void)
648{ 646{
649 WARN_ON(1); 647 WARN_ON(1);
650} 648}
651
652#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 1f476e477844..41f1b5c00a1d 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -22,6 +22,7 @@
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/mpspec.h> 23#include <asm/mpspec.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/k8.h>
25 26
26static __init int find_northbridge(void) 27static __init int find_northbridge(void)
27{ 28{
@@ -56,34 +57,33 @@ static __init void early_get_boot_cpu_id(void)
56 /* 57 /*
57 * Find possible boot-time SMP configuration: 58 * Find possible boot-time SMP configuration:
58 */ 59 */
60#ifdef CONFIG_X86_MPPARSE
59 early_find_smp_config(); 61 early_find_smp_config();
62#endif
60#ifdef CONFIG_ACPI 63#ifdef CONFIG_ACPI
61 /* 64 /*
62 * Read APIC information from ACPI tables. 65 * Read APIC information from ACPI tables.
63 */ 66 */
64 early_acpi_boot_init(); 67 early_acpi_boot_init();
65#endif 68#endif
69#ifdef CONFIG_X86_MPPARSE
66 /* 70 /*
67 * get boot-time SMP configuration: 71 * get boot-time SMP configuration:
68 */ 72 */
69 if (smp_found_config) 73 if (smp_found_config)
70 early_get_smp_config(); 74 early_get_smp_config();
75#endif
71 early_init_lapic_mapping(); 76 early_init_lapic_mapping();
72} 77}
73 78
74int __init k8_scan_nodes(unsigned long start, unsigned long end) 79int __init k8_scan_nodes(unsigned long start, unsigned long end)
75{ 80{
81 unsigned numnodes, cores, bits, apicid_base;
76 unsigned long prevbase; 82 unsigned long prevbase;
77 struct bootnode nodes[8]; 83 struct bootnode nodes[8];
78 int nodeid, i, nb;
79 unsigned char nodeids[8]; 84 unsigned char nodeids[8];
80 int found = 0; 85 int i, j, nb, found = 0;
81 u32 reg; 86 u32 nodeid, reg;
82 unsigned numnodes;
83 unsigned cores;
84 unsigned bits;
85 int j;
86 unsigned apicid_base;
87 87
88 if (!early_pci_allowed()) 88 if (!early_pci_allowed())
89 return -1; 89 return -1;
@@ -105,7 +105,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
105 prevbase = 0; 105 prevbase = 0;
106 for (i = 0; i < 8; i++) { 106 for (i = 0; i < 8; i++) {
107 unsigned long base, limit; 107 unsigned long base, limit;
108 u32 nodeid;
109 108
110 base = read_pci_config(0, nb, 1, 0x40 + i*8); 109 base = read_pci_config(0, nb, 1, 0x40 + i*8);
111 limit = read_pci_config(0, nb, 1, 0x44 + i*8); 110 limit = read_pci_config(0, nb, 1, 0x44 + i*8);
@@ -144,8 +143,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
144 limit |= (1<<24)-1; 143 limit |= (1<<24)-1;
145 limit++; 144 limit++;
146 145
147 if (limit > end_pfn << PAGE_SHIFT) 146 if (limit > max_pfn << PAGE_SHIFT)
148 limit = end_pfn << PAGE_SHIFT; 147 limit = max_pfn << PAGE_SHIFT;
149 if (limit <= base) 148 if (limit <= base)
150 continue; 149 continue;
151 150
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c5066d519e5d..b432d5781773 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -27,30 +27,17 @@
27struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 27struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
28EXPORT_SYMBOL(node_data); 28EXPORT_SYMBOL(node_data);
29 29
30bootmem_data_t plat_node_bdata[MAX_NUMNODES]; 30static bootmem_data_t plat_node_bdata[MAX_NUMNODES];
31 31
32struct memnode memnode; 32struct memnode memnode;
33 33
34#ifdef CONFIG_SMP
35int x86_cpu_to_node_map_init[NR_CPUS] = {
36 [0 ... NR_CPUS-1] = NUMA_NO_NODE
37};
38void *x86_cpu_to_node_map_early_ptr;
39EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
40#endif
41DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
42EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
43
44s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 34s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
45 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 35 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
46}; 36};
47 37
48cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
49EXPORT_SYMBOL(node_to_cpumask_map);
50
51int numa_off __initdata; 38int numa_off __initdata;
52unsigned long __initdata nodemap_addr; 39static unsigned long __initdata nodemap_addr;
53unsigned long __initdata nodemap_size; 40static unsigned long __initdata nodemap_size;
54 41
55/* 42/*
56 * Given a shift value, try to populate memnodemap[] 43 * Given a shift value, try to populate memnodemap[]
@@ -99,7 +86,7 @@ static int __init allocate_cachealigned_memnodemap(void)
99 86
100 addr = 0x8000; 87 addr = 0x8000;
101 nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 88 nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
102 nodemap_addr = find_e820_area(addr, end_pfn<<PAGE_SHIFT, 89 nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
103 nodemap_size, L1_CACHE_BYTES); 90 nodemap_size, L1_CACHE_BYTES);
104 if (nodemap_addr == -1UL) { 91 if (nodemap_addr == -1UL) {
105 printk(KERN_ERR 92 printk(KERN_ERR
@@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
192void __init setup_node_bootmem(int nodeid, unsigned long start, 179void __init setup_node_bootmem(int nodeid, unsigned long start,
193 unsigned long end) 180 unsigned long end)
194{ 181{
195 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size; 182 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
196 unsigned long bootmap_start, nodedata_phys; 183 unsigned long bootmap_start, nodedata_phys;
197 void *bootmap; 184 void *bootmap;
198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 185 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
@@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
204 start, end); 191 start, end);
205 192
206 start_pfn = start >> PAGE_SHIFT; 193 start_pfn = start >> PAGE_SHIFT;
207 end_pfn = end >> PAGE_SHIFT; 194 last_pfn = end >> PAGE_SHIFT;
208 195
209 node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, 196 node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
210 SMP_CACHE_BYTES); 197 SMP_CACHE_BYTES);
@@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
217 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 204 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
218 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; 205 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
219 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 206 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
220 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; 207 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
221 208
222 /* 209 /*
223 * Find a place for the bootmem map 210 * Find a place for the bootmem map
@@ -226,14 +213,14 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
226 * early_node_mem will get that with find_e820_area instead 213 * early_node_mem will get that with find_e820_area instead
227 * of alloc_bootmem, that could clash with reserved range 214 * of alloc_bootmem, that could clash with reserved range
228 */ 215 */
229 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 216 bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
230 nid = phys_to_nid(nodedata_phys); 217 nid = phys_to_nid(nodedata_phys);
231 if (nid == nodeid) 218 if (nid == nodeid)
232 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 219 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
233 else 220 else
234 bootmap_start = round_up(start, PAGE_SIZE); 221 bootmap_start = round_up(start, PAGE_SIZE);
235 /* 222 /*
236 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like 223 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
237 * to use that to align to PAGE_SIZE 224 * to use that to align to PAGE_SIZE
238 */ 225 */
239 bootmap = early_node_mem(nodeid, bootmap_start, end, 226 bootmap = early_node_mem(nodeid, bootmap_start, end,
@@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
248 235
249 bootmap_size = init_bootmem_node(NODE_DATA(nodeid), 236 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
250 bootmap_start >> PAGE_SHIFT, 237 bootmap_start >> PAGE_SHIFT,
251 start_pfn, end_pfn); 238 start_pfn, last_pfn);
252 239
253 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", 240 printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
254 bootmap_start, bootmap_start + bootmap_size - 1, 241 bootmap_start, bootmap_start + bootmap_size - 1,
@@ -309,7 +296,7 @@ void __init numa_init_array(void)
309 296
310#ifdef CONFIG_NUMA_EMU 297#ifdef CONFIG_NUMA_EMU
311/* Numa emulation */ 298/* Numa emulation */
312char *cmdline __initdata; 299static char *cmdline __initdata;
313 300
314/* 301/*
315 * Setups up nid to range from addr to addr + size. If the end 302 * Setups up nid to range from addr to addr + size. If the end
@@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
413} 400}
414 401
415/* 402/*
416 * Sets up the system RAM area from start_pfn to end_pfn according to the 403 * Sets up the system RAM area from start_pfn to last_pfn according to the
417 * numa=fake command-line option. 404 * numa=fake command-line option.
418 */ 405 */
419static struct bootnode nodes[MAX_NUMNODES] __initdata; 406static struct bootnode nodes[MAX_NUMNODES] __initdata;
420 407
421static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) 408static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn)
422{ 409{
423 u64 size, addr = start_pfn << PAGE_SHIFT; 410 u64 size, addr = start_pfn << PAGE_SHIFT;
424 u64 max_addr = end_pfn << PAGE_SHIFT; 411 u64 max_addr = last_pfn << PAGE_SHIFT;
425 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; 412 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
426 413
427 memset(&nodes, 0, sizeof(nodes)); 414 memset(&nodes, 0, sizeof(nodes));
@@ -527,7 +514,7 @@ out:
527} 514}
528#endif /* CONFIG_NUMA_EMU */ 515#endif /* CONFIG_NUMA_EMU */
529 516
530void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) 517void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
531{ 518{
532 int i; 519 int i;
533 520
@@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
535 nodes_clear(node_online_map); 522 nodes_clear(node_online_map);
536 523
537#ifdef CONFIG_NUMA_EMU 524#ifdef CONFIG_NUMA_EMU
538 if (cmdline && !numa_emulation(start_pfn, end_pfn)) 525 if (cmdline && !numa_emulation(start_pfn, last_pfn))
539 return; 526 return;
540 nodes_clear(node_possible_map); 527 nodes_clear(node_possible_map);
541 nodes_clear(node_online_map); 528 nodes_clear(node_online_map);
@@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
543 530
544#ifdef CONFIG_ACPI_NUMA 531#ifdef CONFIG_ACPI_NUMA
545 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 532 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
546 end_pfn << PAGE_SHIFT)) 533 last_pfn << PAGE_SHIFT))
547 return; 534 return;
548 nodes_clear(node_possible_map); 535 nodes_clear(node_possible_map);
549 nodes_clear(node_online_map); 536 nodes_clear(node_online_map);
@@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
551 538
552#ifdef CONFIG_K8_NUMA 539#ifdef CONFIG_K8_NUMA
553 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, 540 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
554 end_pfn<<PAGE_SHIFT)) 541 last_pfn<<PAGE_SHIFT))
555 return; 542 return;
556 nodes_clear(node_possible_map); 543 nodes_clear(node_possible_map);
557 nodes_clear(node_online_map); 544 nodes_clear(node_online_map);
@@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
561 548
562 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 549 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
563 start_pfn << PAGE_SHIFT, 550 start_pfn << PAGE_SHIFT,
564 end_pfn << PAGE_SHIFT); 551 last_pfn << PAGE_SHIFT);
565 /* setup dummy node covering all memory */ 552 /* setup dummy node covering all memory */
566 memnode_shift = 63; 553 memnode_shift = 63;
567 memnodemap = memnode.embedded_map; 554 memnodemap = memnode.embedded_map;
@@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
570 node_set(0, node_possible_map); 557 node_set(0, node_possible_map);
571 for (i = 0; i < NR_CPUS; i++) 558 for (i = 0; i < NR_CPUS; i++)
572 numa_set_node(i, 0); 559 numa_set_node(i, 0);
573 /* cpumask_of_cpu() may not be available during early startup */ 560 e820_register_active_regions(0, start_pfn, last_pfn);
574 memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0])); 561 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
575 cpu_set(0, node_to_cpumask_map[0]);
576 e820_register_active_regions(0, start_pfn, end_pfn);
577 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
578}
579
580__cpuinit void numa_add_cpu(int cpu)
581{
582 set_bit(cpu,
583 (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
584}
585
586void __cpuinit numa_set_node(int cpu, int node)
587{
588 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
589
590 if(cpu_to_node_map)
591 cpu_to_node_map[cpu] = node;
592 else if(per_cpu_offset(cpu))
593 per_cpu(x86_cpu_to_node_map, cpu) = node;
594 else
595 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
596} 562}
597 563
598unsigned long __init numa_free_all_bootmem(void) 564unsigned long __init numa_free_all_bootmem(void)
@@ -613,7 +579,7 @@ void __init paging_init(void)
613 memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); 579 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
614 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; 580 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
615 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; 581 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
616 max_zone_pfns[ZONE_NORMAL] = end_pfn; 582 max_zone_pfns[ZONE_NORMAL] = max_pfn;
617 583
618 sparse_memory_present_with_active_regions(MAX_NUMNODES); 584 sparse_memory_present_with_active_regions(MAX_NUMNODES);
619 sparse_init(); 585 sparse_init();
@@ -641,6 +607,7 @@ static __init int numa_setup(char *opt)
641} 607}
642early_param("numa", numa_setup); 608early_param("numa", numa_setup);
643 609
610#ifdef CONFIG_NUMA
644/* 611/*
645 * Setup early cpu_to_node. 612 * Setup early cpu_to_node.
646 * 613 *
@@ -652,14 +619,19 @@ early_param("numa", numa_setup);
652 * is already initialized in a round robin manner at numa_init_array, 619 * is already initialized in a round robin manner at numa_init_array,
653 * prior to this call, and this initialization is good enough 620 * prior to this call, and this initialization is good enough
654 * for the fake NUMA cases. 621 * for the fake NUMA cases.
622 *
623 * Called before the per_cpu areas are setup.
655 */ 624 */
656void __init init_cpu_to_node(void) 625void __init init_cpu_to_node(void)
657{ 626{
658 int i; 627 int cpu;
628 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
659 629
660 for (i = 0; i < NR_CPUS; i++) { 630 BUG_ON(cpu_to_apicid == NULL);
631
632 for_each_possible_cpu(cpu) {
661 int node; 633 int node;
662 u16 apicid = x86_cpu_to_apicid_init[i]; 634 u16 apicid = cpu_to_apicid[cpu];
663 635
664 if (apicid == BAD_APICID) 636 if (apicid == BAD_APICID)
665 continue; 637 continue;
@@ -668,8 +640,9 @@ void __init init_cpu_to_node(void)
668 continue; 640 continue;
669 if (!node_online(node)) 641 if (!node_online(node))
670 continue; 642 continue;
671 numa_set_node(i, node); 643 numa_set_node(cpu, node);
672 } 644 }
673} 645}
646#endif
674 647
675 648
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 75f1b109aae8..0dcd42eb94e6 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * self test for change_page_attr. 2 * self test for change_page_attr.
3 * 3 *
4 * Clears the global bit on random pages in the direct mapping, then reverts 4 * Clears the a test pte bit on random pages in the direct mapping,
5 * and compares page tables forwards and afterwards. 5 * then reverts and compares page tables forwards and afterwards.
6 */ 6 */
7#include <linux/bootmem.h> 7#include <linux/bootmem.h>
8#include <linux/kthread.h> 8#include <linux/kthread.h>
@@ -32,6 +32,13 @@ enum {
32 GPS = (1<<30) 32 GPS = (1<<30)
33}; 33};
34 34
35#define PAGE_TESTBIT __pgprot(_PAGE_UNUSED1)
36
37static int pte_testbit(pte_t pte)
38{
39 return pte_flags(pte) & _PAGE_UNUSED1;
40}
41
35struct split_state { 42struct split_state {
36 long lpg, gpg, spg, exec; 43 long lpg, gpg, spg, exec;
37 long min_exec, max_exec; 44 long min_exec, max_exec;
@@ -165,15 +172,14 @@ static int pageattr_test(void)
165 continue; 172 continue;
166 } 173 }
167 174
168 err = change_page_attr_clear(addr[i], len[i], 175 err = change_page_attr_set(addr[i], len[i], PAGE_TESTBIT);
169 __pgprot(_PAGE_GLOBAL));
170 if (err < 0) { 176 if (err < 0) {
171 printk(KERN_ERR "CPA %d failed %d\n", i, err); 177 printk(KERN_ERR "CPA %d failed %d\n", i, err);
172 failed++; 178 failed++;
173 } 179 }
174 180
175 pte = lookup_address(addr[i], &level); 181 pte = lookup_address(addr[i], &level);
176 if (!pte || pte_global(*pte) || pte_huge(*pte)) { 182 if (!pte || !pte_testbit(*pte) || pte_huge(*pte)) {
177 printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i], 183 printk(KERN_ERR "CPA %lx: bad pte %Lx\n", addr[i],
178 pte ? (u64)pte_val(*pte) : 0ULL); 184 pte ? (u64)pte_val(*pte) : 0ULL);
179 failed++; 185 failed++;
@@ -198,14 +204,13 @@ static int pageattr_test(void)
198 failed++; 204 failed++;
199 continue; 205 continue;
200 } 206 }
201 err = change_page_attr_set(addr[i], len[i], 207 err = change_page_attr_clear(addr[i], len[i], PAGE_TESTBIT);
202 __pgprot(_PAGE_GLOBAL));
203 if (err < 0) { 208 if (err < 0) {
204 printk(KERN_ERR "CPA reverting failed: %d\n", err); 209 printk(KERN_ERR "CPA reverting failed: %d\n", err);
205 failed++; 210 failed++;
206 } 211 }
207 pte = lookup_address(addr[i], &level); 212 pte = lookup_address(addr[i], &level);
208 if (!pte || !pte_global(*pte)) { 213 if (!pte || pte_testbit(*pte)) {
209 printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n", 214 printk(KERN_ERR "CPA %lx: bad pte after revert %Lx\n",
210 addr[i], pte ? (u64)pte_val(*pte) : 0ULL); 215 addr[i], pte ? (u64)pte_val(*pte) : 0ULL);
211 failed++; 216 failed++;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 60bcb5b6a37e..0389cb8f6b1a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -34,6 +34,41 @@ struct cpa_data {
34 unsigned force_split : 1; 34 unsigned force_split : 1;
35}; 35};
36 36
37#ifdef CONFIG_PROC_FS
38static unsigned long direct_pages_count[PG_LEVEL_NUM];
39
40void update_page_count(int level, unsigned long pages)
41{
42 unsigned long flags;
43
44 /* Protect against CPA */
45 spin_lock_irqsave(&pgd_lock, flags);
46 direct_pages_count[level] += pages;
47 spin_unlock_irqrestore(&pgd_lock, flags);
48}
49
50static void split_page_count(int level)
51{
52 direct_pages_count[level]--;
53 direct_pages_count[level - 1] += PTRS_PER_PTE;
54}
55
56int arch_report_meminfo(char *page)
57{
58 int n = sprintf(page, "DirectMap4k: %8lu\n"
59 "DirectMap2M: %8lu\n",
60 direct_pages_count[PG_LEVEL_4K],
61 direct_pages_count[PG_LEVEL_2M]);
62#ifdef CONFIG_X86_64
63 n += sprintf(page + n, "DirectMap1G: %8lu\n",
64 direct_pages_count[PG_LEVEL_1G]);
65#endif
66 return n;
67}
68#else
69static inline void split_page_count(int level) { }
70#endif
71
37#ifdef CONFIG_X86_64 72#ifdef CONFIG_X86_64
38 73
39static inline unsigned long highmap_start_pfn(void) 74static inline unsigned long highmap_start_pfn(void)
@@ -500,6 +535,16 @@ static int split_large_page(pte_t *kpte, unsigned long address)
500 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) 535 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
501 set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); 536 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
502 537
538 if (address >= (unsigned long)__va(0) &&
539 address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
540 split_page_count(level);
541
542#ifdef CONFIG_X86_64
543 if (address >= (unsigned long)__va(1UL<<32) &&
544 address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
545 split_page_count(level);
546#endif
547
503 /* 548 /*
504 * Install the new, split up pagetable. Important details here: 549 * Install the new, split up pagetable. Important details here:
505 * 550 *
@@ -616,12 +661,21 @@ static int cpa_process_alias(struct cpa_data *cpa)
616 if (cpa->pfn > max_pfn_mapped) 661 if (cpa->pfn > max_pfn_mapped)
617 return 0; 662 return 0;
618 663
664#ifdef CONFIG_X86_64
665 if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
666 return 0;
667#endif
619 /* 668 /*
620 * No need to redo, when the primary call touched the direct 669 * No need to redo, when the primary call touched the direct
621 * mapping already: 670 * mapping already:
622 */ 671 */
623 if (!within(cpa->vaddr, PAGE_OFFSET, 672 if (!(within(cpa->vaddr, PAGE_OFFSET,
624 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { 673 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
674#ifdef CONFIG_X86_64
675 || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
676 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
677#endif
678 )) {
625 679
626 alias_cpa = *cpa; 680 alias_cpa = *cpa;
627 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 681 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
@@ -805,7 +859,7 @@ int _set_memory_wc(unsigned long addr, int numpages)
805 859
806int set_memory_wc(unsigned long addr, int numpages) 860int set_memory_wc(unsigned long addr, int numpages)
807{ 861{
808 if (!pat_wc_enabled) 862 if (!pat_enabled)
809 return set_memory_uc(addr, numpages); 863 return set_memory_uc(addr, numpages);
810 864
811 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, 865 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 06b7a1c90fb8..749766c3c5cd 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -26,11 +26,11 @@
26#include <asm/io.h> 26#include <asm/io.h>
27 27
28#ifdef CONFIG_X86_PAT 28#ifdef CONFIG_X86_PAT
29int __read_mostly pat_wc_enabled = 1; 29int __read_mostly pat_enabled = 1;
30 30
31void __cpuinit pat_disable(char *reason) 31void __cpuinit pat_disable(char *reason)
32{ 32{
33 pat_wc_enabled = 0; 33 pat_enabled = 0;
34 printk(KERN_INFO "%s\n", reason); 34 printk(KERN_INFO "%s\n", reason);
35} 35}
36 36
@@ -42,6 +42,19 @@ static int __init nopat(char *str)
42early_param("nopat", nopat); 42early_param("nopat", nopat);
43#endif 43#endif
44 44
45
46static int debug_enable;
47static int __init pat_debug_setup(char *str)
48{
49 debug_enable = 1;
50 return 0;
51}
52__setup("debugpat", pat_debug_setup);
53
54#define dprintk(fmt, arg...) \
55 do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
56
57
45static u64 __read_mostly boot_pat_state; 58static u64 __read_mostly boot_pat_state;
46 59
47enum { 60enum {
@@ -53,24 +66,25 @@ enum {
53 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ 66 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
54}; 67};
55 68
56#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) 69#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
57 70
58void pat_init(void) 71void pat_init(void)
59{ 72{
60 u64 pat; 73 u64 pat;
61 74
62 if (!pat_wc_enabled) 75 if (!pat_enabled)
63 return; 76 return;
64 77
65 /* Paranoia check. */ 78 /* Paranoia check. */
66 if (!cpu_has_pat) { 79 if (!cpu_has_pat && boot_pat_state) {
67 printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
68 /* 80 /*
69 * Panic if this happens on the secondary CPU, and we 81 * If this happens we are on a secondary CPU, but
70 * switched to PAT on the boot CPU. We have no way to 82 * switched to PAT on the boot CPU. We have no way to
71 * undo PAT. 83 * undo PAT.
72 */ 84 */
73 BUG_ON(boot_pat_state); 85 printk(KERN_ERR "PAT enabled, "
86 "but not supported by secondary CPU\n");
87 BUG();
74 } 88 }
75 89
76 /* Set PWT to Write-Combining. All other bits stay the same */ 90 /* Set PWT to Write-Combining. All other bits stay the same */
@@ -86,8 +100,8 @@ void pat_init(void)
86 * 011 UC _PAGE_CACHE_UC 100 * 011 UC _PAGE_CACHE_UC
87 * PAT bit unused 101 * PAT bit unused
88 */ 102 */
89 pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | 103 pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
90 PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); 104 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
91 105
92 /* Boot CPU check */ 106 /* Boot CPU check */
93 if (!boot_pat_state) 107 if (!boot_pat_state)
@@ -103,11 +117,11 @@ void pat_init(void)
103static char *cattr_name(unsigned long flags) 117static char *cattr_name(unsigned long flags)
104{ 118{
105 switch (flags & _PAGE_CACHE_MASK) { 119 switch (flags & _PAGE_CACHE_MASK) {
106 case _PAGE_CACHE_UC: return "uncached"; 120 case _PAGE_CACHE_UC: return "uncached";
107 case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; 121 case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
108 case _PAGE_CACHE_WB: return "write-back"; 122 case _PAGE_CACHE_WB: return "write-back";
109 case _PAGE_CACHE_WC: return "write-combining"; 123 case _PAGE_CACHE_WC: return "write-combining";
110 default: return "broken"; 124 default: return "broken";
111 } 125 }
112} 126}
113 127
@@ -145,47 +159,50 @@ static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
145 * The intersection is based on "Effective Memory Type" tables in IA-32 159 * The intersection is based on "Effective Memory Type" tables in IA-32
146 * SDM vol 3a 160 * SDM vol 3a
147 */ 161 */
148static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, 162static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
149 unsigned long *ret_prot)
150{ 163{
151 unsigned long pat_type;
152 u8 mtrr_type;
153
154 pat_type = prot & _PAGE_CACHE_MASK;
155 prot &= (~_PAGE_CACHE_MASK);
156
157 /*
158 * We return the PAT request directly for types where PAT takes
159 * precedence with respect to MTRR and for UC_MINUS.
160 * Consistency checks with other PAT requests is done later
161 * while going through memtype list.
162 */
163 if (pat_type == _PAGE_CACHE_WC) {
164 *ret_prot = prot | _PAGE_CACHE_WC;
165 return 0;
166 } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
167 *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
168 return 0;
169 } else if (pat_type == _PAGE_CACHE_UC) {
170 *ret_prot = prot | _PAGE_CACHE_UC;
171 return 0;
172 }
173
174 /* 164 /*
175 * Look for MTRR hint to get the effective type in case where PAT 165 * Look for MTRR hint to get the effective type in case where PAT
176 * request is for WB. 166 * request is for WB.
177 */ 167 */
178 mtrr_type = mtrr_type_lookup(start, end); 168 if (req_type == _PAGE_CACHE_WB) {
169 u8 mtrr_type;
170
171 mtrr_type = mtrr_type_lookup(start, end);
172 if (mtrr_type == MTRR_TYPE_UNCACHABLE)
173 return _PAGE_CACHE_UC;
174 if (mtrr_type == MTRR_TYPE_WRCOMB)
175 return _PAGE_CACHE_WC;
176 }
179 177
180 if (mtrr_type == MTRR_TYPE_UNCACHABLE) { 178 return req_type;
181 *ret_prot = prot | _PAGE_CACHE_UC; 179}
182 } else if (mtrr_type == MTRR_TYPE_WRCOMB) { 180
183 *ret_prot = prot | _PAGE_CACHE_WC; 181static int chk_conflict(struct memtype *new, struct memtype *entry,
184 } else { 182 unsigned long *type)
185 *ret_prot = prot | _PAGE_CACHE_WB; 183{
184 if (new->type != entry->type) {
185 if (type) {
186 new->type = entry->type;
187 *type = entry->type;
188 } else
189 goto conflict;
186 } 190 }
187 191
192 /* check overlaps with more than one entry in the list */
193 list_for_each_entry_continue(entry, &memtype_list, nd) {
194 if (new->end <= entry->start)
195 break;
196 else if (new->type != entry->type)
197 goto conflict;
198 }
188 return 0; 199 return 0;
200
201 conflict:
202 printk(KERN_INFO "%s:%d conflicting memory types "
203 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
204 new->end, cattr_name(new->type), cattr_name(entry->type));
205 return -EBUSY;
189} 206}
190 207
191/* 208/*
@@ -198,37 +215,36 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
198 * req_type will have a special case value '-1', when requester want to inherit 215 * req_type will have a special case value '-1', when requester want to inherit
199 * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. 216 * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
200 * 217 *
201 * If ret_type is NULL, function will return an error if it cannot reserve the 218 * If new_type is NULL, function will return an error if it cannot reserve the
202 * region with req_type. If ret_type is non-null, function will return 219 * region with req_type. If new_type is non-NULL, function will return
203 * available type in ret_type in case of no error. In case of any error 220 * available type in new_type in case of no error. In case of any error
204 * it will return a negative return value. 221 * it will return a negative return value.
205 */ 222 */
206int reserve_memtype(u64 start, u64 end, unsigned long req_type, 223int reserve_memtype(u64 start, u64 end, unsigned long req_type,
207 unsigned long *ret_type) 224 unsigned long *new_type)
208{ 225{
209 struct memtype *new_entry = NULL; 226 struct memtype *new, *entry;
210 struct memtype *parse;
211 unsigned long actual_type; 227 unsigned long actual_type;
228 struct list_head *where;
212 int err = 0; 229 int err = 0;
213 230
214 /* Only track when pat_wc_enabled */ 231 BUG_ON(start >= end); /* end is exclusive */
215 if (!pat_wc_enabled) { 232
233 if (!pat_enabled) {
216 /* This is identical to page table setting without PAT */ 234 /* This is identical to page table setting without PAT */
217 if (ret_type) { 235 if (new_type) {
218 if (req_type == -1) { 236 if (req_type == -1)
219 *ret_type = _PAGE_CACHE_WB; 237 *new_type = _PAGE_CACHE_WB;
220 } else { 238 else
221 *ret_type = req_type; 239 *new_type = req_type & _PAGE_CACHE_MASK;
222 }
223 } 240 }
224 return 0; 241 return 0;
225 } 242 }
226 243
227 /* Low ISA region is always mapped WB in page table. No need to track */ 244 /* Low ISA region is always mapped WB in page table. No need to track */
228 if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { 245 if (is_ISA_range(start, end - 1)) {
229 if (ret_type) 246 if (new_type)
230 *ret_type = _PAGE_CACHE_WB; 247 *new_type = _PAGE_CACHE_WB;
231
232 return 0; 248 return 0;
233 } 249 }
234 250
@@ -241,206 +257,92 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
241 */ 257 */
242 u8 mtrr_type = mtrr_type_lookup(start, end); 258 u8 mtrr_type = mtrr_type_lookup(start, end);
243 259
244 if (mtrr_type == MTRR_TYPE_WRBACK) { 260 if (mtrr_type == MTRR_TYPE_WRBACK)
245 req_type = _PAGE_CACHE_WB;
246 actual_type = _PAGE_CACHE_WB; 261 actual_type = _PAGE_CACHE_WB;
247 } else { 262 else
248 req_type = _PAGE_CACHE_UC_MINUS;
249 actual_type = _PAGE_CACHE_UC_MINUS; 263 actual_type = _PAGE_CACHE_UC_MINUS;
250 } 264 } else
251 } else { 265 actual_type = pat_x_mtrr_type(start, end,
252 req_type &= _PAGE_CACHE_MASK; 266 req_type & _PAGE_CACHE_MASK);
253 err = pat_x_mtrr_type(start, end, req_type, &actual_type);
254 }
255
256 if (err) {
257 if (ret_type)
258 *ret_type = actual_type;
259
260 return -EINVAL;
261 }
262 267
263 new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); 268 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
264 if (!new_entry) 269 if (!new)
265 return -ENOMEM; 270 return -ENOMEM;
266 271
267 new_entry->start = start; 272 new->start = start;
268 new_entry->end = end; 273 new->end = end;
269 new_entry->type = actual_type; 274 new->type = actual_type;
270 275
271 if (ret_type) 276 if (new_type)
272 *ret_type = actual_type; 277 *new_type = actual_type;
273 278
274 spin_lock(&memtype_lock); 279 spin_lock(&memtype_lock);
275 280
276 /* Search for existing mapping that overlaps the current range */ 281 /* Search for existing mapping that overlaps the current range */
277 list_for_each_entry(parse, &memtype_list, nd) { 282 where = NULL;
278 struct memtype *saved_ptr; 283 list_for_each_entry(entry, &memtype_list, nd) {
279 284 if (end <= entry->start) {
280 if (parse->start >= end) { 285 where = entry->nd.prev;
281 pr_debug("New Entry\n");
282 list_add(&new_entry->nd, parse->nd.prev);
283 new_entry = NULL;
284 break; 286 break;
285 } 287 } else if (start <= entry->start) { /* end > entry->start */
286 288 err = chk_conflict(new, entry, new_type);
287 if (start <= parse->start && end >= parse->start) { 289 if (!err) {
288 if (actual_type != parse->type && ret_type) { 290 dprintk("Overlap at 0x%Lx-0x%Lx\n",
289 actual_type = parse->type; 291 entry->start, entry->end);
290 *ret_type = actual_type; 292 where = entry->nd.prev;
291 new_entry->type = actual_type;
292 }
293
294 if (actual_type != parse->type) {
295 printk(
296 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
297 current->comm, current->pid,
298 start, end,
299 cattr_name(actual_type),
300 cattr_name(parse->type));
301 err = -EBUSY;
302 break;
303 } 293 }
304
305 saved_ptr = parse;
306 /*
307 * Check to see whether the request overlaps more
308 * than one entry in the list
309 */
310 list_for_each_entry_continue(parse, &memtype_list, nd) {
311 if (end <= parse->start) {
312 break;
313 }
314
315 if (actual_type != parse->type) {
316 printk(
317 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
318 current->comm, current->pid,
319 start, end,
320 cattr_name(actual_type),
321 cattr_name(parse->type));
322 err = -EBUSY;
323 break;
324 }
325 }
326
327 if (err) {
328 break;
329 }
330
331 pr_debug("Overlap at 0x%Lx-0x%Lx\n",
332 saved_ptr->start, saved_ptr->end);
333 /* No conflict. Go ahead and add this new entry */
334 list_add(&new_entry->nd, saved_ptr->nd.prev);
335 new_entry = NULL;
336 break; 294 break;
337 } 295 } else if (start < entry->end) { /* start > entry->start */
338 296 err = chk_conflict(new, entry, new_type);
339 if (start < parse->end) { 297 if (!err) {
340 if (actual_type != parse->type && ret_type) { 298 dprintk("Overlap at 0x%Lx-0x%Lx\n",
341 actual_type = parse->type; 299 entry->start, entry->end);
342 *ret_type = actual_type; 300 where = &entry->nd;
343 new_entry->type = actual_type;
344 }
345
346 if (actual_type != parse->type) {
347 printk(
348 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
349 current->comm, current->pid,
350 start, end,
351 cattr_name(actual_type),
352 cattr_name(parse->type));
353 err = -EBUSY;
354 break;
355 }
356
357 saved_ptr = parse;
358 /*
359 * Check to see whether the request overlaps more
360 * than one entry in the list
361 */
362 list_for_each_entry_continue(parse, &memtype_list, nd) {
363 if (end <= parse->start) {
364 break;
365 }
366
367 if (actual_type != parse->type) {
368 printk(
369 KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
370 current->comm, current->pid,
371 start, end,
372 cattr_name(actual_type),
373 cattr_name(parse->type));
374 err = -EBUSY;
375 break;
376 }
377 }
378
379 if (err) {
380 break;
381 } 301 }
382
383 pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
384 saved_ptr->start, saved_ptr->end);
385 /* No conflict. Go ahead and add this new entry */
386 list_add(&new_entry->nd, &saved_ptr->nd);
387 new_entry = NULL;
388 break; 302 break;
389 } 303 }
390 } 304 }
391 305
392 if (err) { 306 if (err) {
393 printk(KERN_INFO 307 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
394 "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", 308 "track %s, req %s\n",
395 start, end, cattr_name(new_entry->type), 309 start, end, cattr_name(new->type), cattr_name(req_type));
396 cattr_name(req_type)); 310 kfree(new);
397 kfree(new_entry);
398 spin_unlock(&memtype_lock); 311 spin_unlock(&memtype_lock);
399 return err; 312 return err;
400 } 313 }
401 314
402 if (new_entry) { 315 if (where)
403 /* No conflict. Not yet added to the list. Add to the tail */ 316 list_add(&new->nd, where);
404 list_add_tail(&new_entry->nd, &memtype_list); 317 else
405 pr_debug("New Entry\n"); 318 list_add_tail(&new->nd, &memtype_list);
406 }
407
408 if (ret_type) {
409 pr_debug(
410 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
411 start, end, cattr_name(actual_type),
412 cattr_name(req_type), cattr_name(*ret_type));
413 } else {
414 pr_debug(
415 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
416 start, end, cattr_name(actual_type),
417 cattr_name(req_type));
418 }
419 319
420 spin_unlock(&memtype_lock); 320 spin_unlock(&memtype_lock);
321
322 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
323 start, end, cattr_name(new->type), cattr_name(req_type),
324 new_type ? cattr_name(*new_type) : "-");
325
421 return err; 326 return err;
422} 327}
423 328
424int free_memtype(u64 start, u64 end) 329int free_memtype(u64 start, u64 end)
425{ 330{
426 struct memtype *ml; 331 struct memtype *entry;
427 int err = -EINVAL; 332 int err = -EINVAL;
428 333
429 /* Only track when pat_wc_enabled */ 334 if (!pat_enabled)
430 if (!pat_wc_enabled) {
431 return 0; 335 return 0;
432 }
433 336
434 /* Low ISA region is always mapped WB. No need to track */ 337 /* Low ISA region is always mapped WB. No need to track */
435 if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { 338 if (is_ISA_range(start, end - 1))
436 return 0; 339 return 0;
437 }
438 340
439 spin_lock(&memtype_lock); 341 spin_lock(&memtype_lock);
440 list_for_each_entry(ml, &memtype_list, nd) { 342 list_for_each_entry(entry, &memtype_list, nd) {
441 if (ml->start == start && ml->end == end) { 343 if (entry->start == start && entry->end == end) {
442 list_del(&ml->nd); 344 list_del(&entry->nd);
443 kfree(ml); 345 kfree(entry);
444 err = 0; 346 err = 0;
445 break; 347 break;
446 } 348 }
@@ -452,7 +354,7 @@ int free_memtype(u64 start, u64 end)
452 current->comm, current->pid, start, end); 354 current->comm, current->pid, start, end);
453 } 355 }
454 356
455 pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); 357 dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
456 return err; 358 return err;
457} 359}
458 360
@@ -521,12 +423,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
521 * caching for the high addresses through the KEN pin, but 423 * caching for the high addresses through the KEN pin, but
522 * we maintain the tradition of paranoia in this code. 424 * we maintain the tradition of paranoia in this code.
523 */ 425 */
524 if (!pat_wc_enabled && 426 if (!pat_enabled &&
525 ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || 427 !(boot_cpu_has(X86_FEATURE_MTRR) ||
526 test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || 428 boot_cpu_has(X86_FEATURE_K6_MTRR) ||
527 test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || 429 boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
528 test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && 430 boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
529 (pfn << PAGE_SHIFT) >= __pa(high_memory)) { 431 (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
530 flags = _PAGE_CACHE_UC; 432 flags = _PAGE_CACHE_UC;
531 } 433 }
532#endif 434#endif
@@ -547,8 +449,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
547 if (retval < 0) 449 if (retval < 0)
548 return 0; 450 return 0;
549 451
550 if (pfn <= max_pfn_mapped && 452 if (((pfn <= max_low_pfn_mapped) ||
551 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { 453 (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
454 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
552 free_memtype(offset, offset + size); 455 free_memtype(offset, offset + size);
553 printk(KERN_INFO 456 printk(KERN_INFO
554 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", 457 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@ -586,4 +489,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
586 489
587 free_memtype(addr, addr + size); 490 free_memtype(addr, addr + size);
588} 491}
589
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 50159764f694..557b2abceef8 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -2,6 +2,7 @@
2#include <asm/pgalloc.h> 2#include <asm/pgalloc.h>
3#include <asm/pgtable.h> 3#include <asm/pgtable.h>
4#include <asm/tlb.h> 4#include <asm/tlb.h>
5#include <asm/fixmap.h>
5 6
6pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 7pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
7{ 8{
@@ -65,12 +66,6 @@ static inline void pgd_list_del(pgd_t *pgd)
65static void pgd_ctor(void *p) 66static void pgd_ctor(void *p)
66{ 67{
67 pgd_t *pgd = p; 68 pgd_t *pgd = p;
68 unsigned long flags;
69
70 /* Clear usermode parts of PGD */
71 memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
72
73 spin_lock_irqsave(&pgd_lock, flags);
74 69
75 /* If the pgd points to a shared pagetable level (either the 70 /* If the pgd points to a shared pagetable level (either the
76 ptes in non-PAE, or shared PMD in PAE), then just copy the 71 ptes in non-PAE, or shared PMD in PAE), then just copy the
@@ -90,8 +85,6 @@ static void pgd_ctor(void *p)
90 /* list required to sync kernel mapping updates */ 85 /* list required to sync kernel mapping updates */
91 if (!SHARED_KERNEL_PMD) 86 if (!SHARED_KERNEL_PMD)
92 pgd_list_add(pgd); 87 pgd_list_add(pgd);
93
94 spin_unlock_irqrestore(&pgd_lock, flags);
95} 88}
96 89
97static void pgd_dtor(void *pgd) 90static void pgd_dtor(void *pgd)
@@ -119,6 +112,72 @@ static void pgd_dtor(void *pgd)
119 112
120#ifdef CONFIG_X86_PAE 113#ifdef CONFIG_X86_PAE
121/* 114/*
115 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
116 * updating the top-level pagetable entries to guarantee the
117 * processor notices the update. Since this is expensive, and
118 * all 4 top-level entries are used almost immediately in a
119 * new process's life, we just pre-populate them here.
120 *
121 * Also, if we're in a paravirt environment where the kernel pmd is
122 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
123 * and initialize the kernel pmds here.
124 */
125#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
126
127void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
128{
129 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
130
131 /* Note: almost everything apart from _PAGE_PRESENT is
132 reserved at the pmd (PDPT) level. */
133 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
134
135 /*
136 * According to Intel App note "TLBs, Paging-Structure Caches,
137 * and Their Invalidation", April 2007, document 317080-001,
138 * section 8.1: in PAE mode we explicitly have to flush the
139 * TLB via cr3 if the top-level pgd is changed...
140 */
141 if (mm == current->active_mm)
142 write_cr3(read_cr3());
143}
144#else /* !CONFIG_X86_PAE */
145
146/* No need to prepopulate any pagetable entries in non-PAE modes. */
147#define PREALLOCATED_PMDS 0
148
149#endif /* CONFIG_X86_PAE */
150
151static void free_pmds(pmd_t *pmds[])
152{
153 int i;
154
155 for(i = 0; i < PREALLOCATED_PMDS; i++)
156 if (pmds[i])
157 free_page((unsigned long)pmds[i]);
158}
159
160static int preallocate_pmds(pmd_t *pmds[])
161{
162 int i;
163 bool failed = false;
164
165 for(i = 0; i < PREALLOCATED_PMDS; i++) {
166 pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
167 if (pmd == NULL)
168 failed = true;
169 pmds[i] = pmd;
170 }
171
172 if (failed) {
173 free_pmds(pmds);
174 return -ENOMEM;
175 }
176
177 return 0;
178}
179
180/*
122 * Mop up any pmd pages which may still be attached to the pgd. 181 * Mop up any pmd pages which may still be attached to the pgd.
123 * Normally they will be freed by munmap/exit_mmap, but any pmd we 182 * Normally they will be freed by munmap/exit_mmap, but any pmd we
124 * preallocate which never got a corresponding vma will need to be 183 * preallocate which never got a corresponding vma will need to be
@@ -128,7 +187,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
128{ 187{
129 int i; 188 int i;
130 189
131 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { 190 for(i = 0; i < PREALLOCATED_PMDS; i++) {
132 pgd_t pgd = pgdp[i]; 191 pgd_t pgd = pgdp[i];
133 192
134 if (pgd_val(pgd) != 0) { 193 if (pgd_val(pgd) != 0) {
@@ -142,32 +201,17 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
142 } 201 }
143} 202}
144 203
145/* 204static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
146 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
147 * updating the top-level pagetable entries to guarantee the
148 * processor notices the update. Since this is expensive, and
149 * all 4 top-level entries are used almost immediately in a
150 * new process's life, we just pre-populate them here.
151 *
152 * Also, if we're in a paravirt environment where the kernel pmd is
153 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
154 * and initialize the kernel pmds here.
155 */
156static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
157{ 205{
158 pud_t *pud; 206 pud_t *pud;
159 unsigned long addr; 207 unsigned long addr;
160 int i; 208 int i;
161 209
162 pud = pud_offset(pgd, 0); 210 pud = pud_offset(pgd, 0);
163 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
164 i++, pud++, addr += PUD_SIZE) {
165 pmd_t *pmd = pmd_alloc_one(mm, addr);
166 211
167 if (!pmd) { 212 for (addr = i = 0; i < PREALLOCATED_PMDS;
168 pgd_mop_up_pmds(mm, pgd); 213 i++, pud++, addr += PUD_SIZE) {
169 return 0; 214 pmd_t *pmd = pmds[i];
170 }
171 215
172 if (i >= KERNEL_PGD_BOUNDARY) 216 if (i >= KERNEL_PGD_BOUNDARY)
173 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), 217 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
@@ -175,61 +219,54 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
175 219
176 pud_populate(mm, pud, pmd); 220 pud_populate(mm, pud, pmd);
177 } 221 }
178
179 return 1;
180} 222}
181 223
182void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) 224pgd_t *pgd_alloc(struct mm_struct *mm)
183{ 225{
184 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); 226 pgd_t *pgd;
227 pmd_t *pmds[PREALLOCATED_PMDS];
228 unsigned long flags;
185 229
186 /* Note: almost everything apart from _PAGE_PRESENT is 230 pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
187 reserved at the pmd (PDPT) level. */
188 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
189 231
190 /* 232 if (pgd == NULL)
191 * According to Intel App note "TLBs, Paging-Structure Caches, 233 goto out;
192 * and Their Invalidation", April 2007, document 317080-001,
193 * section 8.1: in PAE mode we explicitly have to flush the
194 * TLB via cr3 if the top-level pgd is changed...
195 */
196 if (mm == current->active_mm)
197 write_cr3(read_cr3());
198}
199#else /* !CONFIG_X86_PAE */
200/* No need to prepopulate any pagetable entries in non-PAE modes. */
201static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
202{
203 return 1;
204}
205 234
206static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd) 235 mm->pgd = pgd;
207{
208}
209#endif /* CONFIG_X86_PAE */
210 236
211pgd_t *pgd_alloc(struct mm_struct *mm) 237 if (preallocate_pmds(pmds) != 0)
212{ 238 goto out_free_pgd;
213 pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
214 239
215 /* so that alloc_pmd can use it */ 240 if (paravirt_pgd_alloc(mm) != 0)
216 mm->pgd = pgd; 241 goto out_free_pmds;
217 if (pgd)
218 pgd_ctor(pgd);
219 242
220 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { 243 /*
221 pgd_dtor(pgd); 244 * Make sure that pre-populating the pmds is atomic with
222 free_page((unsigned long)pgd); 245 * respect to anything walking the pgd_list, so that they
223 pgd = NULL; 246 * never see a partially populated pgd.
224 } 247 */
248 spin_lock_irqsave(&pgd_lock, flags);
249
250 pgd_ctor(pgd);
251 pgd_prepopulate_pmd(mm, pgd, pmds);
252
253 spin_unlock_irqrestore(&pgd_lock, flags);
225 254
226 return pgd; 255 return pgd;
256
257out_free_pmds:
258 free_pmds(pmds);
259out_free_pgd:
260 free_page((unsigned long)pgd);
261out:
262 return NULL;
227} 263}
228 264
229void pgd_free(struct mm_struct *mm, pgd_t *pgd) 265void pgd_free(struct mm_struct *mm, pgd_t *pgd)
230{ 266{
231 pgd_mop_up_pmds(mm, pgd); 267 pgd_mop_up_pmds(mm, pgd);
232 pgd_dtor(pgd); 268 pgd_dtor(pgd);
269 paravirt_pgd_free(mm, pgd);
233 free_page((unsigned long)pgd); 270 free_page((unsigned long)pgd);
234} 271}
235 272
@@ -255,7 +292,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
255 292
256 if (pte_young(*ptep)) 293 if (pte_young(*ptep))
257 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, 294 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
258 &ptep->pte); 295 (unsigned long *) &ptep->pte);
259 296
260 if (ret) 297 if (ret)
261 pte_update(vma->vm_mm, addr, ptep); 298 pte_update(vma->vm_mm, addr, ptep);
@@ -274,3 +311,22 @@ int ptep_clear_flush_young(struct vm_area_struct *vma,
274 311
275 return young; 312 return young;
276} 313}
314
315int fixmaps_set;
316
317void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
318{
319 unsigned long address = __fix_to_virt(idx);
320
321 if (idx >= __end_of_fixed_addresses) {
322 BUG();
323 return;
324 }
325 set_pte_vaddr(address, pte);
326 fixmaps_set++;
327}
328
329void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
330{
331 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
332}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 369cf065b6a4..b4becbf8c570 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -71,7 +71,7 @@ void show_mem(void)
71 * Associate a virtual page frame with a given physical page frame 71 * Associate a virtual page frame with a given physical page frame
72 * and protection flags for that frame. 72 * and protection flags for that frame.
73 */ 73 */
74static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) 74void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
75{ 75{
76 pgd_t *pgd; 76 pgd_t *pgd;
77 pud_t *pud; 77 pud_t *pud;
@@ -94,8 +94,8 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
94 return; 94 return;
95 } 95 }
96 pte = pte_offset_kernel(pmd, vaddr); 96 pte = pte_offset_kernel(pmd, vaddr);
97 if (pgprot_val(flags)) 97 if (pte_val(pteval))
98 set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags)); 98 set_pte_present(&init_mm, vaddr, pte, pteval);
99 else 99 else
100 pte_clear(&init_mm, vaddr, pte); 100 pte_clear(&init_mm, vaddr, pte);
101 101
@@ -141,22 +141,9 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
141 __flush_tlb_one(vaddr); 141 __flush_tlb_one(vaddr);
142} 142}
143 143
144static int fixmaps;
145unsigned long __FIXADDR_TOP = 0xfffff000; 144unsigned long __FIXADDR_TOP = 0xfffff000;
146EXPORT_SYMBOL(__FIXADDR_TOP); 145EXPORT_SYMBOL(__FIXADDR_TOP);
147 146
148void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
149{
150 unsigned long address = __fix_to_virt(idx);
151
152 if (idx >= __end_of_fixed_addresses) {
153 BUG();
154 return;
155 }
156 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
157 fixmaps++;
158}
159
160/** 147/**
161 * reserve_top_address - reserves a hole in the top of kernel address space 148 * reserve_top_address - reserves a hole in the top of kernel address space
162 * @reserve - size of hole to reserve 149 * @reserve - size of hole to reserve
@@ -164,11 +151,44 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
164 * Can be used to relocate the fixmap area and poke a hole in the top 151 * Can be used to relocate the fixmap area and poke a hole in the top
165 * of kernel address space to make room for a hypervisor. 152 * of kernel address space to make room for a hypervisor.
166 */ 153 */
167void reserve_top_address(unsigned long reserve) 154void __init reserve_top_address(unsigned long reserve)
168{ 155{
169 BUG_ON(fixmaps > 0); 156 BUG_ON(fixmaps_set > 0);
170 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 157 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
171 (int)-reserve); 158 (int)-reserve);
172 __FIXADDR_TOP = -reserve - PAGE_SIZE; 159 __FIXADDR_TOP = -reserve - PAGE_SIZE;
173 __VMALLOC_RESERVE += reserve; 160 __VMALLOC_RESERVE += reserve;
174} 161}
162
163/*
164 * vmalloc=size forces the vmalloc area to be exactly 'size'
165 * bytes. This can be used to increase (or decrease) the
166 * vmalloc area - the default is 128m.
167 */
168static int __init parse_vmalloc(char *arg)
169{
170 if (!arg)
171 return -EINVAL;
172
173 __VMALLOC_RESERVE = memparse(arg, &arg);
174 return 0;
175}
176early_param("vmalloc", parse_vmalloc);
177
178/*
179 * reservetop=size reserves a hole at the top of the kernel address space which
180 * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
181 * so relocating the fixmap can be done before paging initialization.
182 */
183static int __init parse_reservetop(char *arg)
184{
185 unsigned long address;
186
187 if (!arg)
188 return -EINVAL;
189
190 address = memparse(arg, &arg);
191 reserve_top_address(address);
192 return 0;
193}
194early_param("reservetop", parse_reservetop);
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/mm/srat_32.c
index 70e4a374b4e8..f41d67f8f831 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -31,6 +31,7 @@
31#include <asm/srat.h> 31#include <asm/srat.h>
32#include <asm/topology.h> 32#include <asm/topology.h>
33#include <asm/smp.h> 33#include <asm/smp.h>
34#include <asm/e820.h>
34 35
35/* 36/*
36 * proximity macros and definitions 37 * proximity macros and definitions
@@ -41,7 +42,7 @@
41#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) 42#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
42/* bitmap length; _PXM is at most 255 */ 43/* bitmap length; _PXM is at most 255 */
43#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 44#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
44static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ 45static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
45 46
46#define MAX_CHUNKS_PER_NODE 3 47#define MAX_CHUNKS_PER_NODE 3
47#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) 48#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
@@ -52,16 +53,37 @@ struct node_memory_chunk_s {
52 u8 nid; // which cnode contains this chunk? 53 u8 nid; // which cnode contains this chunk?
53 u8 bank; // which mem bank on this node 54 u8 bank; // which mem bank on this node
54}; 55};
55static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; 56static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
56 57
57static int num_memory_chunks; /* total number of memory chunks */ 58static int __initdata num_memory_chunks; /* total number of memory chunks */
58static u8 __initdata apicid_to_pxm[MAX_APICID]; 59static u8 __initdata apicid_to_pxm[MAX_APICID];
59 60
61int numa_off __initdata;
62int acpi_numa __initdata;
63
64static __init void bad_srat(void)
65{
66 printk(KERN_ERR "SRAT: SRAT not used.\n");
67 acpi_numa = -1;
68 num_memory_chunks = 0;
69}
70
71static __init inline int srat_disabled(void)
72{
73 return numa_off || acpi_numa < 0;
74}
75
60/* Identify CPU proximity domains */ 76/* Identify CPU proximity domains */
61static void __init parse_cpu_affinity_structure(char *p) 77void __init
78acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
62{ 79{
63 struct acpi_srat_cpu_affinity *cpu_affinity = 80 if (srat_disabled())
64 (struct acpi_srat_cpu_affinity *) p; 81 return;
82 if (cpu_affinity->header.length !=
83 sizeof(struct acpi_srat_cpu_affinity)) {
84 bad_srat();
85 return;
86 }
65 87
66 if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0) 88 if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
67 return; /* empty entry */ 89 return; /* empty entry */
@@ -71,7 +93,7 @@ static void __init parse_cpu_affinity_structure(char *p)
71 93
72 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; 94 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
73 95
74 printk("CPU 0x%02X in proximity domain 0x%02X\n", 96 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
75 cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo); 97 cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
76} 98}
77 99
@@ -79,14 +101,21 @@ static void __init parse_cpu_affinity_structure(char *p)
79 * Identify memory proximity domains and hot-remove capabilities. 101 * Identify memory proximity domains and hot-remove capabilities.
80 * Fill node memory chunk list structure. 102 * Fill node memory chunk list structure.
81 */ 103 */
82static void __init parse_memory_affinity_structure (char *sratp) 104void __init
105acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
83{ 106{
84 unsigned long long paddr, size; 107 unsigned long long paddr, size;
85 unsigned long start_pfn, end_pfn; 108 unsigned long start_pfn, end_pfn;
86 u8 pxm; 109 u8 pxm;
87 struct node_memory_chunk_s *p, *q, *pend; 110 struct node_memory_chunk_s *p, *q, *pend;
88 struct acpi_srat_mem_affinity *memory_affinity = 111
89 (struct acpi_srat_mem_affinity *) sratp; 112 if (srat_disabled())
113 return;
114 if (memory_affinity->header.length !=
115 sizeof(struct acpi_srat_mem_affinity)) {
116 bad_srat();
117 return;
118 }
90 119
91 if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0) 120 if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
92 return; /* empty entry */ 121 return; /* empty entry */
@@ -105,7 +134,8 @@ static void __init parse_memory_affinity_structure (char *sratp)
105 134
106 135
107 if (num_memory_chunks >= MAXCHUNKS) { 136 if (num_memory_chunks >= MAXCHUNKS) {
108 printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n", 137 printk(KERN_WARNING "Too many mem chunks in SRAT."
138 " Ignoring %lld MBytes at %llx\n",
109 size/(1024*1024), paddr); 139 size/(1024*1024), paddr);
110 return; 140 return;
111 } 141 }
@@ -126,7 +156,8 @@ static void __init parse_memory_affinity_structure (char *sratp)
126 156
127 num_memory_chunks++; 157 num_memory_chunks++;
128 158
129 printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n", 159 printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
160 " in proximity domain %02x %s\n",
130 start_pfn, end_pfn, 161 start_pfn, end_pfn,
131 memory_affinity->memory_type, 162 memory_affinity->memory_type,
132 pxm, 163 pxm,
@@ -134,6 +165,14 @@ static void __init parse_memory_affinity_structure (char *sratp)
134 "enabled and removable" : "enabled" ) ); 165 "enabled and removable" : "enabled" ) );
135} 166}
136 167
168/* Callback for SLIT parsing */
169void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
170{
171}
172
173void acpi_numa_arch_fixup(void)
174{
175}
137/* 176/*
138 * The SRAT table always lists ascending addresses, so can always 177 * The SRAT table always lists ascending addresses, so can always
139 * assume that the first "start" address that you see is the real 178 * assume that the first "start" address that you see is the real
@@ -149,7 +188,7 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
149 * *possible* memory hotplug areas the same as normal RAM. 188 * *possible* memory hotplug areas the same as normal RAM.
150 */ 189 */
151 if (memory_chunk->start_pfn >= max_pfn) { 190 if (memory_chunk->start_pfn >= max_pfn) {
152 printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n", 191 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
153 memory_chunk->start_pfn, memory_chunk->end_pfn); 192 memory_chunk->start_pfn, memory_chunk->end_pfn);
154 return; 193 return;
155 } 194 }
@@ -166,42 +205,17 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
166 node_end_pfn[nid] = memory_chunk->end_pfn; 205 node_end_pfn[nid] = memory_chunk->end_pfn;
167} 206}
168 207
169/* Parse the ACPI Static Resource Affinity Table */ 208int __init get_memcfg_from_srat(void)
170static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
171{ 209{
172 u8 *start, *end, *p;
173 int i, j, nid; 210 int i, j, nid;
174 211
175 start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
176 p = start;
177 end = (u8 *)sratp + sratp->header.length;
178 212
179 memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ 213 if (srat_disabled())
180 memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); 214 goto out_fail;
181
182 num_memory_chunks = 0;
183 while (p < end) {
184 switch (*p) {
185 case ACPI_SRAT_TYPE_CPU_AFFINITY:
186 parse_cpu_affinity_structure(p);
187 break;
188 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
189 parse_memory_affinity_structure(p);
190 break;
191 default:
192 printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]);
193 break;
194 }
195 p += p[1];
196 if (p[1] == 0) {
197 printk("acpi20_parse_srat: Entry length value is zero;"
198 " can't parse any further!\n");
199 break;
200 }
201 }
202 215
203 if (num_memory_chunks == 0) { 216 if (num_memory_chunks == 0) {
204 printk("could not finy any ACPI SRAT memory areas.\n"); 217 printk(KERN_WARNING
218 "could not finy any ACPI SRAT memory areas.\n");
205 goto out_fail; 219 goto out_fail;
206 } 220 }
207 221
@@ -228,131 +242,39 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
228 for (i = 0; i < num_memory_chunks; i++) 242 for (i = 0; i < num_memory_chunks; i++)
229 node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); 243 node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
230 244
231 printk("pxm bitmap: "); 245 printk(KERN_DEBUG "pxm bitmap: ");
232 for (i = 0; i < sizeof(pxm_bitmap); i++) { 246 for (i = 0; i < sizeof(pxm_bitmap); i++) {
233 printk("%02X ", pxm_bitmap[i]); 247 printk(KERN_CONT "%02x ", pxm_bitmap[i]);
234 } 248 }
235 printk("\n"); 249 printk(KERN_CONT "\n");
236 printk("Number of logical nodes in system = %d\n", num_online_nodes()); 250 printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
237 printk("Number of memory chunks in system = %d\n", num_memory_chunks); 251 num_online_nodes());
252 printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
253 num_memory_chunks);
238 254
239 for (i = 0; i < MAX_APICID; i++) 255 for (i = 0; i < MAX_APICID; i++)
240 apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); 256 apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
241 257
242 for (j = 0; j < num_memory_chunks; j++){ 258 for (j = 0; j < num_memory_chunks; j++){
243 struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; 259 struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
244 printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", 260 printk(KERN_DEBUG
261 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
245 j, chunk->nid, chunk->start_pfn, chunk->end_pfn); 262 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
246 node_read_chunk(chunk->nid, chunk); 263 node_read_chunk(chunk->nid, chunk);
247 add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); 264 e820_register_active_regions(chunk->nid, chunk->start_pfn,
265 min(chunk->end_pfn, max_pfn));
248 } 266 }
249 267
250 for_each_online_node(nid) { 268 for_each_online_node(nid) {
251 unsigned long start = node_start_pfn[nid]; 269 unsigned long start = node_start_pfn[nid];
252 unsigned long end = node_end_pfn[nid]; 270 unsigned long end = min(node_end_pfn[nid], max_pfn);
253 271
254 memory_present(nid, start, end); 272 memory_present(nid, start, end);
255 node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); 273 node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
256 } 274 }
257 return 1; 275 return 1;
258out_fail: 276out_fail:
259 return 0; 277 printk(KERN_ERR "failed to get NUMA memory information from SRAT"
260} 278 " table\n");
261
262struct acpi_static_rsdt {
263 struct acpi_table_rsdt table;
264 u32 padding[7]; /* Allow for 7 more table entries */
265};
266
267int __init get_memcfg_from_srat(void)
268{
269 struct acpi_table_header *header = NULL;
270 struct acpi_table_rsdp *rsdp = NULL;
271 struct acpi_table_rsdt *rsdt = NULL;
272 acpi_native_uint rsdp_address = 0;
273 struct acpi_static_rsdt saved_rsdt;
274 int tables = 0;
275 int i = 0;
276
277 rsdp_address = acpi_os_get_root_pointer();
278 if (!rsdp_address) {
279 printk("%s: System description tables not found\n",
280 __func__);
281 goto out_err;
282 }
283
284 printk("%s: assigning address to rsdp\n", __func__);
285 rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address;
286 if (!rsdp) {
287 printk("%s: Didn't find ACPI root!\n", __func__);
288 goto out_err;
289 }
290
291 printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
292 rsdp->oem_id);
293
294 if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) {
295 printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__);
296 goto out_err;
297 }
298
299 rsdt = (struct acpi_table_rsdt *)
300 early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
301
302 if (!rsdt) {
303 printk(KERN_WARNING
304 "%s: ACPI: Invalid root system description tables (RSDT)\n",
305 __func__);
306 goto out_err;
307 }
308
309 header = &rsdt->header;
310
311 if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
312 printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
313 goto out_err;
314 }
315
316 /*
317 * The number of tables is computed by taking the
318 * size of all entries (header size minus total
319 * size of RSDT) divided by the size of each entry
320 * (4-byte table pointers).
321 */
322 tables = (header->length - sizeof(struct acpi_table_header)) / 4;
323
324 if (!tables)
325 goto out_err;
326
327 memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
328
329 if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
330 printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
331 saved_rsdt.table.header.length);
332 goto out_err;
333 }
334
335 printk("Begin SRAT table scan....\n");
336
337 for (i = 0; i < tables; i++) {
338 /* Map in header, then map in full table length. */
339 header = (struct acpi_table_header *)
340 early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
341 if (!header)
342 break;
343 header = (struct acpi_table_header *)
344 early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
345 if (!header)
346 break;
347
348 if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4))
349 continue;
350
351 /* we've found the srat table. don't need to look at any more tables */
352 return acpi20_parse_srat((struct acpi_table_srat *)header);
353 }
354out_err:
355 remove_all_active_ranges();
356 printk("failed to get NUMA memory information from SRAT table\n");
357 return 0; 279 return 0;
358} 280}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 99649dccad28..1b4763e26ea9 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -100,7 +100,19 @@ static __init inline int srat_disabled(void)
100/* Callback for SLIT parsing */ 100/* Callback for SLIT parsing */
101void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 101void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
102{ 102{
103 acpi_slit = slit; 103 unsigned length;
104 unsigned long phys;
105
106 length = slit->header.length;
107 phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
108 PAGE_SIZE);
109
110 if (phys == -1L)
111 panic(" Can not save slit!\n");
112
113 acpi_slit = __va(phys);
114 memcpy(acpi_slit, slit, length);
115 reserve_early(phys, phys + length, "ACPI SLIT");
104} 116}
105 117
106/* Callback for Proximity Domain -> LAPIC mapping */ 118/* Callback for Proximity Domain -> LAPIC mapping */
@@ -299,7 +311,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
299 pxmram = 0; 311 pxmram = 0;
300 } 312 }
301 313
302 e820ram = end_pfn - absent_pages_in_range(0, end_pfn); 314 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
303 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ 315 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
304 if ((long)(e820ram - pxmram) >= 1*1024*1024) { 316 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
305 printk(KERN_ERR 317 printk(KERN_ERR
@@ -376,7 +388,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
376 if (node == NUMA_NO_NODE) 388 if (node == NUMA_NO_NODE)
377 continue; 389 continue;
378 if (!node_isset(node, node_possible_map)) 390 if (!node_isset(node, node_possible_map))
379 numa_set_node(i, NUMA_NO_NODE); 391 numa_clear_node(i);
380 } 392 }
381 numa_init_array(); 393 numa_init_array();
382 return 0; 394 return 0;
@@ -495,6 +507,7 @@ int __node_distance(int a, int b)
495 507
496EXPORT_SYMBOL(__node_distance); 508EXPORT_SYMBOL(__node_distance);
497 509
510#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
498int memory_add_physaddr_to_nid(u64 start) 511int memory_add_physaddr_to_nid(u64 start)
499{ 512{
500 int i, ret = 0; 513 int i, ret = 0;
@@ -506,4 +519,4 @@ int memory_add_physaddr_to_nid(u64 start)
506 return ret; 519 return ret;
507} 520}
508EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); 521EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
509 522#endif
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index cc48d3fde545..2b6ad5b9f9d5 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -269,12 +269,13 @@ static void nmi_cpu_shutdown(void *dummy)
269 269
270static void nmi_shutdown(void) 270static void nmi_shutdown(void)
271{ 271{
272 struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); 272 struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
273 nmi_enabled = 0; 273 nmi_enabled = 0;
274 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); 274 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
275 unregister_die_notifier(&profile_exceptions_nb); 275 unregister_die_notifier(&profile_exceptions_nb);
276 model->shutdown(msrs); 276 model->shutdown(msrs);
277 free_msrs(); 277 free_msrs();
278 put_cpu_var(cpu_msrs);
278} 279}
279 280
280static void nmi_cpu_start(void *dummy) 281static void nmi_cpu_start(void *dummy)
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index c5c8e485fc44..e515e8db842a 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -1,5 +1,17 @@
1ifeq ($(CONFIG_X86_32),y) 1obj-y := i386.o init.o
2include ${srctree}/arch/x86/pci/Makefile_32 2
3else 3obj-$(CONFIG_PCI_BIOS) += pcbios.o
4include ${srctree}/arch/x86/pci/Makefile_64 4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o
5endif 5obj-$(CONFIG_PCI_DIRECT) += direct.o
6obj-$(CONFIG_PCI_OLPC) += olpc.o
7
8pci-y := fixup.o
9pci-$(CONFIG_ACPI) += acpi.o
10pci-y += legacy.o irq.o
11
12pci-$(CONFIG_X86_VISWS) += visws.o
13
14pci-$(CONFIG_X86_NUMAQ) += numa.o
15
16obj-y += $(pci-y) common.o early.o
17obj-y += amd_bus.o
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
deleted file mode 100644
index 89ec35d00efd..000000000000
--- a/arch/x86/pci/Makefile_32
+++ /dev/null
@@ -1,24 +0,0 @@
1obj-y := i386.o init.o
2
3obj-$(CONFIG_PCI_BIOS) += pcbios.o
4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o
5obj-$(CONFIG_PCI_DIRECT) += direct.o
6obj-$(CONFIG_PCI_OLPC) += olpc.o
7
8pci-y := fixup.o
9
10# Do not change the ordering here. There is a nasty init function
11# ordering dependency which breaks when you move acpi.o below
12# legacy/irq.o
13pci-$(CONFIG_ACPI) += acpi.o
14pci-y += legacy.o irq.o
15
16# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
17# therefor correct. This needs a proper fix by distangling the code.
18pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
19pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o
20
21# Necessary for NUMAQ as well
22pci-$(CONFIG_NUMA) += mp_bus_to_node.o
23
24obj-y += $(pci-y) common.o early.o
diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64
deleted file mode 100644
index 8fbd19832cf6..000000000000
--- a/arch/x86/pci/Makefile_64
+++ /dev/null
@@ -1,17 +0,0 @@
1#
2# Makefile for X86_64 specific PCI routines
3#
4# Reuse the i386 PCI subsystem
5#
6EXTRA_CFLAGS += -Iarch/x86/pci
7
8obj-y := i386.o
9obj-$(CONFIG_PCI_DIRECT)+= direct.o
10obj-y += fixup.o init.o
11obj-$(CONFIG_ACPI) += acpi.o
12obj-y += legacy.o irq.o common.o early.o
13# mmconfig has a 64bit special
14obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o
15
16obj-y += k8-bus_64.o
17
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index d95de2f199cd..19af06927fbc 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -171,8 +171,11 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
171 if (node != -1) 171 if (node != -1)
172 set_mp_bus_to_node(busnum, node); 172 set_mp_bus_to_node(busnum, node);
173 else 173 else
174 node = get_mp_bus_to_node(busnum);
175#endif 174#endif
175 node = get_mp_bus_to_node(busnum);
176
177 if (node != -1 && !node_online(node))
178 node = -1;
176 179
177 /* Allocate per-root-bus (not per bus) arch-specific data. 180 /* Allocate per-root-bus (not per bus) arch-specific data.
178 * TODO: leak; this memory is never freed. 181 * TODO: leak; this memory is never freed.
@@ -204,22 +207,23 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
204 if (!bus) 207 if (!bus)
205 kfree(sd); 208 kfree(sd);
206 209
210 if (bus && node != -1) {
207#ifdef CONFIG_ACPI_NUMA 211#ifdef CONFIG_ACPI_NUMA
208 if (bus) { 212 if (pxm >= 0)
209 if (pxm >= 0) {
210 printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", 213 printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
211 busnum, pxm, pxm_to_node(pxm)); 214 busnum, pxm, node);
212 } 215#else
213 } 216 printk(KERN_DEBUG "bus %02x -> node %d\n",
217 busnum, node);
214#endif 218#endif
219 }
215 220
216 if (bus && (pci_probe & PCI_USE__CRS)) 221 if (bus && (pci_probe & PCI_USE__CRS))
217 get_current_resources(device, busnum, domain, bus); 222 get_current_resources(device, busnum, domain, bus);
218 return bus; 223 return bus;
219} 224}
220 225
221extern int pci_routeirq; 226int __init pci_acpi_init(void)
222static int __init pci_acpi_init(void)
223{ 227{
224 struct pci_dev *dev = NULL; 228 struct pci_dev *dev = NULL;
225 229
@@ -253,4 +257,3 @@ static int __init pci_acpi_init(void)
253 257
254 return 0; 258 return 0;
255} 259}
256subsys_initcall(pci_acpi_init);
diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/amd_bus.c
index 5c2799c20e47..a18141ae3f02 100644
--- a/arch/x86/pci/k8-bus_64.c
+++ b/arch/x86/pci/amd_bus.c
@@ -1,40 +1,25 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/topology.h>
4#include "pci.h"
5
6#ifdef CONFIG_X86_64
3#include <asm/pci-direct.h> 7#include <asm/pci-direct.h>
4#include <asm/mpspec.h> 8#include <asm/mpspec.h>
5#include <linux/cpumask.h> 9#include <linux/cpumask.h>
6#include <linux/topology.h> 10#endif
7 11
8/* 12/*
9 * This discovers the pcibus <-> node mapping on AMD K8. 13 * This discovers the pcibus <-> node mapping on AMD K8.
10 * also get peer root bus resource for io,mmio 14 * also get peer root bus resource for io,mmio
11 */ 15 */
12 16
13
14/*
15 * sub bus (transparent) will use entres from 3 to store extra from root,
16 * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
17 */
18#define RES_NUM 16
19struct pci_root_info {
20 char name[12];
21 unsigned int res_num;
22 struct resource res[RES_NUM];
23 int bus_min;
24 int bus_max;
25 int node;
26 int link;
27};
28
29/* 4 at this time, it may become to 32 */
30#define PCI_ROOT_NR 4
31static int pci_root_num;
32static struct pci_root_info pci_root_info[PCI_ROOT_NR];
33
34#ifdef CONFIG_NUMA 17#ifdef CONFIG_NUMA
35 18
36#define BUS_NR 256 19#define BUS_NR 256
37 20
21#ifdef CONFIG_X86_64
22
38static int mp_bus_to_node[BUS_NR]; 23static int mp_bus_to_node[BUS_NR];
39 24
40void set_mp_bus_to_node(int busnum, int node) 25void set_mp_bus_to_node(int busnum, int node)
@@ -61,7 +46,52 @@ int get_mp_bus_to_node(int busnum)
61 46
62 return node; 47 return node;
63} 48}
64#endif 49
50#else /* CONFIG_X86_32 */
51
52static unsigned char mp_bus_to_node[BUS_NR];
53
54void set_mp_bus_to_node(int busnum, int node)
55{
56 if (busnum >= 0 && busnum < BUS_NR)
57 mp_bus_to_node[busnum] = (unsigned char) node;
58}
59
60int get_mp_bus_to_node(int busnum)
61{
62 int node;
63
64 if (busnum < 0 || busnum > (BUS_NR - 1))
65 return 0;
66 node = mp_bus_to_node[busnum];
67 return node;
68}
69
70#endif /* CONFIG_X86_32 */
71
72#endif /* CONFIG_NUMA */
73
74#ifdef CONFIG_X86_64
75
76/*
77 * sub bus (transparent) will use entres from 3 to store extra from root,
78 * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
79 */
80#define RES_NUM 16
81struct pci_root_info {
82 char name[12];
83 unsigned int res_num;
84 struct resource res[RES_NUM];
85 int bus_min;
86 int bus_max;
87 int node;
88 int link;
89};
90
91/* 4 at this time, it may become to 32 */
92#define PCI_ROOT_NR 4
93static int pci_root_num;
94static struct pci_root_info pci_root_info[PCI_ROOT_NR];
65 95
66void set_pci_bus_resources_arch_default(struct pci_bus *b) 96void set_pci_bus_resources_arch_default(struct pci_bus *b)
67{ 97{
@@ -384,7 +414,7 @@ static int __init early_fill_mp_bus_info(void)
384 /* need to take out [0, TOM) for RAM*/ 414 /* need to take out [0, TOM) for RAM*/
385 address = MSR_K8_TOP_MEM1; 415 address = MSR_K8_TOP_MEM1;
386 rdmsrl(address, val); 416 rdmsrl(address, val);
387 end = (val & 0xffffff8000000ULL); 417 end = (val & 0xffffff800000ULL);
388 printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); 418 printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
389 if (end < (1ULL<<32)) 419 if (end < (1ULL<<32))
390 update_range(range, 0, end - 1); 420 update_range(range, 0, end - 1);
@@ -478,7 +508,7 @@ static int __init early_fill_mp_bus_info(void)
478 /* TOP_MEM2 */ 508 /* TOP_MEM2 */
479 address = MSR_K8_TOP_MEM2; 509 address = MSR_K8_TOP_MEM2;
480 rdmsrl(address, val); 510 rdmsrl(address, val);
481 end = (val & 0xffffff8000000ULL); 511 end = (val & 0xffffff800000ULL);
482 printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); 512 printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
483 update_range(range, 1ULL<<32, end - 1); 513 update_range(range, 1ULL<<32, end - 1);
484 } 514 }
@@ -526,3 +556,31 @@ static int __init early_fill_mp_bus_info(void)
526} 556}
527 557
528postcore_initcall(early_fill_mp_bus_info); 558postcore_initcall(early_fill_mp_bus_info);
559
560#endif
561
562/* common 32/64 bit code */
563
564#define ENABLE_CF8_EXT_CFG (1ULL << 46)
565
566static void enable_pci_io_ecs_per_cpu(void *unused)
567{
568 u64 reg;
569 rdmsrl(MSR_AMD64_NB_CFG, reg);
570 if (!(reg & ENABLE_CF8_EXT_CFG)) {
571 reg |= ENABLE_CF8_EXT_CFG;
572 wrmsrl(MSR_AMD64_NB_CFG, reg);
573 }
574}
575
576static int __init enable_pci_io_ecs(void)
577{
578 /* assume all cpus from fam10h have IO ECS */
579 if (boot_cpu_data.x86 < 0x10)
580 return 0;
581 on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1);
582 pci_probe |= PCI_HAS_IO_ECS;
583 return 0;
584}
585
586postcore_initcall(enable_pci_io_ecs);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 6e64aaf00d1d..20b9f59f95df 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -384,7 +384,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
384 384
385extern u8 pci_cache_line_size; 385extern u8 pci_cache_line_size;
386 386
387static int __init pcibios_init(void) 387int __init pcibios_init(void)
388{ 388{
389 struct cpuinfo_x86 *c = &boot_cpu_data; 389 struct cpuinfo_x86 *c = &boot_cpu_data;
390 390
@@ -411,8 +411,6 @@ static int __init pcibios_init(void)
411 return 0; 411 return 0;
412} 412}
413 413
414subsys_initcall(pcibios_init);
415
416char * __devinit pcibios_setup(char *str) 414char * __devinit pcibios_setup(char *str)
417{ 415{
418 if (!strcmp(str, "off")) { 416 if (!strcmp(str, "off")) {
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 21d1e0e0d535..9915293500fb 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -8,18 +8,21 @@
8#include "pci.h" 8#include "pci.h"
9 9
10/* 10/*
11 * Functions for accessing PCI configuration space with type 1 accesses 11 * Functions for accessing PCI base (first 256 bytes) and extended
12 * (4096 bytes per PCI function) configuration space with type 1
13 * accesses.
12 */ 14 */
13 15
14#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ 16#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
15 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) 17 (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \
18 | (devfn << 8) | (reg & 0xFC))
16 19
17static int pci_conf1_read(unsigned int seg, unsigned int bus, 20static int pci_conf1_read(unsigned int seg, unsigned int bus,
18 unsigned int devfn, int reg, int len, u32 *value) 21 unsigned int devfn, int reg, int len, u32 *value)
19{ 22{
20 unsigned long flags; 23 unsigned long flags;
21 24
22 if ((bus > 255) || (devfn > 255) || (reg > 255)) { 25 if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
23 *value = -1; 26 *value = -1;
24 return -EINVAL; 27 return -EINVAL;
25 } 28 }
@@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus,
50{ 53{
51 unsigned long flags; 54 unsigned long flags;
52 55
53 if ((bus > 255) || (devfn > 255) || (reg > 255)) 56 if ((bus > 255) || (devfn > 255) || (reg > 4095))
54 return -EINVAL; 57 return -EINVAL;
55 58
56 spin_lock_irqsave(&pci_config_lock, flags); 59 spin_lock_irqsave(&pci_config_lock, flags);
@@ -260,10 +263,18 @@ void __init pci_direct_init(int type)
260 return; 263 return;
261 printk(KERN_INFO "PCI: Using configuration type %d for base access\n", 264 printk(KERN_INFO "PCI: Using configuration type %d for base access\n",
262 type); 265 type);
263 if (type == 1) 266 if (type == 1) {
264 raw_pci_ops = &pci_direct_conf1; 267 raw_pci_ops = &pci_direct_conf1;
265 else 268 if (raw_pci_ext_ops)
266 raw_pci_ops = &pci_direct_conf2; 269 return;
270 if (!(pci_probe & PCI_HAS_IO_ECS))
271 return;
272 printk(KERN_INFO "PCI: Using configuration type 1 "
273 "for extended access\n");
274 raw_pci_ext_ops = &pci_direct_conf1;
275 return;
276 }
277 raw_pci_ops = &pci_direct_conf2;
267} 278}
268 279
269int __init pci_direct_probe(void) 280int __init pci_direct_probe(void)
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 10fb308fded8..5281e343dd9f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -299,9 +299,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
299 return -EINVAL; 299 return -EINVAL;
300 300
301 prot = pgprot_val(vma->vm_page_prot); 301 prot = pgprot_val(vma->vm_page_prot);
302 if (pat_wc_enabled && write_combine) 302 if (pat_enabled && write_combine)
303 prot |= _PAGE_CACHE_WC; 303 prot |= _PAGE_CACHE_WC;
304 else if (pat_wc_enabled || boot_cpu_data.x86 > 3) 304 else if (pat_enabled || boot_cpu_data.x86 > 3)
305 /* 305 /*
306 * ioremap() and ioremap_nocache() defaults to UC MINUS for now. 306 * ioremap() and ioremap_nocache() defaults to UC MINUS for now.
307 * To avoid attribute conflicts, request UC MINUS here 307 * To avoid attribute conflicts, request UC MINUS here
@@ -334,7 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
334 flags = new_flags; 334 flags = new_flags;
335 } 335 }
336 336
337 if (vma->vm_pgoff <= max_pfn_mapped && 337 if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
338 (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
339 vma->vm_pgoff <= max_pfn_mapped)) &&
338 ioremap_change_attr((unsigned long)__va(addr), len, flags)) { 340 ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
339 free_memtype(addr, addr + len); 341 free_memtype(addr, addr + len);
340 return -EINVAL; 342 return -EINVAL;
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index b821f4462d99..d6c950f81858 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -4,7 +4,7 @@
4 4
5/* arch_initcall has too random ordering, so call the initializers 5/* arch_initcall has too random ordering, so call the initializers
6 in the right sequence from here. */ 6 in the right sequence from here. */
7static __init int pci_access_init(void) 7static __init int pci_arch_init(void)
8{ 8{
9#ifdef CONFIG_PCI_DIRECT 9#ifdef CONFIG_PCI_DIRECT
10 int type = 0; 10 int type = 0;
@@ -40,4 +40,4 @@ static __init int pci_access_init(void)
40 40
41 return 0; 41 return 0;
42} 42}
43arch_initcall(pci_access_init); 43arch_initcall(pci_arch_init);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index ca8df9c260bc..dc568c6b83f8 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -11,8 +11,8 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/interrupt.h> 12#include <linux/interrupt.h>
13#include <linux/dmi.h> 13#include <linux/dmi.h>
14#include <asm/io.h> 14#include <linux/io.h>
15#include <asm/smp.h> 15#include <linux/smp.h>
16#include <asm/io_apic.h> 16#include <asm/io_apic.h>
17#include <linux/irq.h> 17#include <linux/irq.h>
18#include <linux/acpi.h> 18#include <linux/acpi.h>
@@ -61,7 +61,7 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL;
61 * and perform checksum verification. 61 * and perform checksum verification.
62 */ 62 */
63 63
64static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) 64static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
65{ 65{
66 struct irq_routing_table *rt; 66 struct irq_routing_table *rt;
67 int i; 67 int i;
@@ -74,7 +74,7 @@ static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr)
74 rt->size < sizeof(struct irq_routing_table)) 74 rt->size < sizeof(struct irq_routing_table))
75 return NULL; 75 return NULL;
76 sum = 0; 76 sum = 0;
77 for (i=0; i < rt->size; i++) 77 for (i = 0; i < rt->size; i++)
78 sum += addr[i]; 78 sum += addr[i];
79 if (!sum) { 79 if (!sum) {
80 DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); 80 DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt);
@@ -100,7 +100,7 @@ static struct irq_routing_table * __init pirq_find_routing_table(void)
100 return rt; 100 return rt;
101 printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); 101 printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
102 } 102 }
103 for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { 103 for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
104 rt = pirq_check_routing_table(addr); 104 rt = pirq_check_routing_table(addr);
105 if (rt) 105 if (rt)
106 return rt; 106 return rt;
@@ -122,20 +122,20 @@ static void __init pirq_peer_trick(void)
122 struct irq_info *e; 122 struct irq_info *e;
123 123
124 memset(busmap, 0, sizeof(busmap)); 124 memset(busmap, 0, sizeof(busmap));
125 for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { 125 for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
126 e = &rt->slots[i]; 126 e = &rt->slots[i];
127#ifdef DEBUG 127#ifdef DEBUG
128 { 128 {
129 int j; 129 int j;
130 DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); 130 DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
131 for(j=0; j<4; j++) 131 for (j = 0; j < 4; j++)
132 DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); 132 DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
133 DBG("\n"); 133 DBG("\n");
134 } 134 }
135#endif 135#endif
136 busmap[e->bus] = 1; 136 busmap[e->bus] = 1;
137 } 137 }
138 for(i = 1; i < 256; i++) { 138 for (i = 1; i < 256; i++) {
139 int node; 139 int node;
140 if (!busmap[i] || pci_find_bus(0, i)) 140 if (!busmap[i] || pci_find_bus(0, i))
141 continue; 141 continue;
@@ -285,7 +285,7 @@ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
285 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; 285 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
286 286
287 WARN_ON_ONCE(pirq > 4); 287 WARN_ON_ONCE(pirq > 4);
288 return read_config_nybble(router,0x43, pirqmap[pirq-1]); 288 return read_config_nybble(router, 0x43, pirqmap[pirq-1]);
289} 289}
290 290
291static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) 291static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
@@ -314,7 +314,7 @@ static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
314 314
315/* 315/*
316 * Cyrix: nibble offset 0x5C 316 * Cyrix: nibble offset 0x5C
317 * 0x5C bits 7:4 is INTB bits 3:0 is INTA 317 * 0x5C bits 7:4 is INTB bits 3:0 is INTA
318 * 0x5D bits 7:4 is INTD bits 3:0 is INTC 318 * 0x5D bits 7:4 is INTD bits 3:0 is INTC
319 */ 319 */
320static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) 320static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
@@ -350,7 +350,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
350 * Apparently there are systems implementing PCI routing table using 350 * Apparently there are systems implementing PCI routing table using
351 * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. 351 * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
352 * We try our best to handle both link mappings. 352 * We try our best to handle both link mappings.
353 * 353 *
354 * Currently (2003-05-21) it appears most SiS chipsets follow the 354 * Currently (2003-05-21) it appears most SiS chipsets follow the
355 * definition of routing registers from the SiS-5595 southbridge. 355 * definition of routing registers from the SiS-5595 southbridge.
356 * According to the SiS 5595 datasheets the revision id's of the 356 * According to the SiS 5595 datasheets the revision id's of the
@@ -370,7 +370,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
370 * 370 *
371 * 0x62: USBIRQ: 371 * 0x62: USBIRQ:
372 * bit 6 OHCI function disabled (0), enabled (1) 372 * bit 6 OHCI function disabled (0), enabled (1)
373 * 373 *
374 * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved 374 * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
375 * 375 *
376 * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved 376 * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
@@ -487,9 +487,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
487 u8 irq; 487 u8 irq;
488 irq = 0; 488 irq = 0;
489 if (pirq <= 4) 489 if (pirq <= 4)
490 {
491 irq = read_config_nybble(router, 0x56, pirq - 1); 490 irq = read_config_nybble(router, 0x56, pirq - 1);
492 }
493 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", 491 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
494 dev->vendor, dev->device, pirq, irq); 492 dev->vendor, dev->device, pirq, irq);
495 return irq; 493 return irq;
@@ -497,12 +495,10 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq
497 495
498static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) 496static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
499{ 497{
500 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 498 printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
501 dev->vendor, dev->device, pirq, irq); 499 dev->vendor, dev->device, pirq, irq);
502 if (pirq <= 4) 500 if (pirq <= 4)
503 {
504 write_config_nybble(router, 0x56, pirq - 1, irq); 501 write_config_nybble(router, 0x56, pirq - 1, irq);
505 }
506 return 1; 502 return 1;
507} 503}
508 504
@@ -549,50 +545,49 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
549 if (pci_dev_present(pirq_440gx)) 545 if (pci_dev_present(pirq_440gx))
550 return 0; 546 return 0;
551 547
552 switch(device) 548 switch (device) {
553 { 549 case PCI_DEVICE_ID_INTEL_82371FB_0:
554 case PCI_DEVICE_ID_INTEL_82371FB_0: 550 case PCI_DEVICE_ID_INTEL_82371SB_0:
555 case PCI_DEVICE_ID_INTEL_82371SB_0: 551 case PCI_DEVICE_ID_INTEL_82371AB_0:
556 case PCI_DEVICE_ID_INTEL_82371AB_0: 552 case PCI_DEVICE_ID_INTEL_82371MX:
557 case PCI_DEVICE_ID_INTEL_82371MX: 553 case PCI_DEVICE_ID_INTEL_82443MX_0:
558 case PCI_DEVICE_ID_INTEL_82443MX_0: 554 case PCI_DEVICE_ID_INTEL_82801AA_0:
559 case PCI_DEVICE_ID_INTEL_82801AA_0: 555 case PCI_DEVICE_ID_INTEL_82801AB_0:
560 case PCI_DEVICE_ID_INTEL_82801AB_0: 556 case PCI_DEVICE_ID_INTEL_82801BA_0:
561 case PCI_DEVICE_ID_INTEL_82801BA_0: 557 case PCI_DEVICE_ID_INTEL_82801BA_10:
562 case PCI_DEVICE_ID_INTEL_82801BA_10: 558 case PCI_DEVICE_ID_INTEL_82801CA_0:
563 case PCI_DEVICE_ID_INTEL_82801CA_0: 559 case PCI_DEVICE_ID_INTEL_82801CA_12:
564 case PCI_DEVICE_ID_INTEL_82801CA_12: 560 case PCI_DEVICE_ID_INTEL_82801DB_0:
565 case PCI_DEVICE_ID_INTEL_82801DB_0: 561 case PCI_DEVICE_ID_INTEL_82801E_0:
566 case PCI_DEVICE_ID_INTEL_82801E_0: 562 case PCI_DEVICE_ID_INTEL_82801EB_0:
567 case PCI_DEVICE_ID_INTEL_82801EB_0: 563 case PCI_DEVICE_ID_INTEL_ESB_1:
568 case PCI_DEVICE_ID_INTEL_ESB_1: 564 case PCI_DEVICE_ID_INTEL_ICH6_0:
569 case PCI_DEVICE_ID_INTEL_ICH6_0: 565 case PCI_DEVICE_ID_INTEL_ICH6_1:
570 case PCI_DEVICE_ID_INTEL_ICH6_1: 566 case PCI_DEVICE_ID_INTEL_ICH7_0:
571 case PCI_DEVICE_ID_INTEL_ICH7_0: 567 case PCI_DEVICE_ID_INTEL_ICH7_1:
572 case PCI_DEVICE_ID_INTEL_ICH7_1: 568 case PCI_DEVICE_ID_INTEL_ICH7_30:
573 case PCI_DEVICE_ID_INTEL_ICH7_30: 569 case PCI_DEVICE_ID_INTEL_ICH7_31:
574 case PCI_DEVICE_ID_INTEL_ICH7_31: 570 case PCI_DEVICE_ID_INTEL_ESB2_0:
575 case PCI_DEVICE_ID_INTEL_ESB2_0: 571 case PCI_DEVICE_ID_INTEL_ICH8_0:
576 case PCI_DEVICE_ID_INTEL_ICH8_0: 572 case PCI_DEVICE_ID_INTEL_ICH8_1:
577 case PCI_DEVICE_ID_INTEL_ICH8_1: 573 case PCI_DEVICE_ID_INTEL_ICH8_2:
578 case PCI_DEVICE_ID_INTEL_ICH8_2: 574 case PCI_DEVICE_ID_INTEL_ICH8_3:
579 case PCI_DEVICE_ID_INTEL_ICH8_3: 575 case PCI_DEVICE_ID_INTEL_ICH8_4:
580 case PCI_DEVICE_ID_INTEL_ICH8_4: 576 case PCI_DEVICE_ID_INTEL_ICH9_0:
581 case PCI_DEVICE_ID_INTEL_ICH9_0: 577 case PCI_DEVICE_ID_INTEL_ICH9_1:
582 case PCI_DEVICE_ID_INTEL_ICH9_1: 578 case PCI_DEVICE_ID_INTEL_ICH9_2:
583 case PCI_DEVICE_ID_INTEL_ICH9_2: 579 case PCI_DEVICE_ID_INTEL_ICH9_3:
584 case PCI_DEVICE_ID_INTEL_ICH9_3: 580 case PCI_DEVICE_ID_INTEL_ICH9_4:
585 case PCI_DEVICE_ID_INTEL_ICH9_4: 581 case PCI_DEVICE_ID_INTEL_ICH9_5:
586 case PCI_DEVICE_ID_INTEL_ICH9_5: 582 case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
587 case PCI_DEVICE_ID_INTEL_TOLAPAI_0: 583 case PCI_DEVICE_ID_INTEL_ICH10_0:
588 case PCI_DEVICE_ID_INTEL_ICH10_0: 584 case PCI_DEVICE_ID_INTEL_ICH10_1:
589 case PCI_DEVICE_ID_INTEL_ICH10_1: 585 case PCI_DEVICE_ID_INTEL_ICH10_2:
590 case PCI_DEVICE_ID_INTEL_ICH10_2: 586 case PCI_DEVICE_ID_INTEL_ICH10_3:
591 case PCI_DEVICE_ID_INTEL_ICH10_3: 587 r->name = "PIIX/ICH";
592 r->name = "PIIX/ICH"; 588 r->get = pirq_piix_get;
593 r->get = pirq_piix_get; 589 r->set = pirq_piix_set;
594 r->set = pirq_piix_set; 590 return 1;
595 return 1;
596 } 591 }
597 return 0; 592 return 0;
598} 593}
@@ -606,7 +601,7 @@ static __init int via_router_probe(struct irq_router *r,
606 * workarounds for some buggy BIOSes 601 * workarounds for some buggy BIOSes
607 */ 602 */
608 if (device == PCI_DEVICE_ID_VIA_82C586_0) { 603 if (device == PCI_DEVICE_ID_VIA_82C586_0) {
609 switch(router->device) { 604 switch (router->device) {
610 case PCI_DEVICE_ID_VIA_82C686: 605 case PCI_DEVICE_ID_VIA_82C686:
611 /* 606 /*
612 * Asus k7m bios wrongly reports 82C686A 607 * Asus k7m bios wrongly reports 82C686A
@@ -631,7 +626,7 @@ static __init int via_router_probe(struct irq_router *r,
631 } 626 }
632 } 627 }
633 628
634 switch(device) { 629 switch (device) {
635 case PCI_DEVICE_ID_VIA_82C586_0: 630 case PCI_DEVICE_ID_VIA_82C586_0:
636 r->name = "VIA"; 631 r->name = "VIA";
637 r->get = pirq_via586_get; 632 r->get = pirq_via586_get;
@@ -654,13 +649,12 @@ static __init int via_router_probe(struct irq_router *r,
654 649
655static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 650static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
656{ 651{
657 switch(device) 652 switch (device) {
658 { 653 case PCI_DEVICE_ID_VLSI_82C534:
659 case PCI_DEVICE_ID_VLSI_82C534: 654 r->name = "VLSI 82C534";
660 r->name = "VLSI 82C534"; 655 r->get = pirq_vlsi_get;
661 r->get = pirq_vlsi_get; 656 r->set = pirq_vlsi_set;
662 r->set = pirq_vlsi_set; 657 return 1;
663 return 1;
664 } 658 }
665 return 0; 659 return 0;
666} 660}
@@ -668,14 +662,13 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router
668 662
669static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 663static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
670{ 664{
671 switch(device) 665 switch (device) {
672 { 666 case PCI_DEVICE_ID_SERVERWORKS_OSB4:
673 case PCI_DEVICE_ID_SERVERWORKS_OSB4: 667 case PCI_DEVICE_ID_SERVERWORKS_CSB5:
674 case PCI_DEVICE_ID_SERVERWORKS_CSB5: 668 r->name = "ServerWorks";
675 r->name = "ServerWorks"; 669 r->get = pirq_serverworks_get;
676 r->get = pirq_serverworks_get; 670 r->set = pirq_serverworks_set;
677 r->set = pirq_serverworks_set; 671 return 1;
678 return 1;
679 } 672 }
680 return 0; 673 return 0;
681} 674}
@@ -684,7 +677,7 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router,
684{ 677{
685 if (device != PCI_DEVICE_ID_SI_503) 678 if (device != PCI_DEVICE_ID_SI_503)
686 return 0; 679 return 0;
687 680
688 r->name = "SIS"; 681 r->name = "SIS";
689 r->get = pirq_sis_get; 682 r->get = pirq_sis_get;
690 r->set = pirq_sis_set; 683 r->set = pirq_sis_set;
@@ -693,47 +686,43 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router,
693 686
694static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 687static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
695{ 688{
696 switch(device) 689 switch (device) {
697 { 690 case PCI_DEVICE_ID_CYRIX_5520:
698 case PCI_DEVICE_ID_CYRIX_5520: 691 r->name = "NatSemi";
699 r->name = "NatSemi"; 692 r->get = pirq_cyrix_get;
700 r->get = pirq_cyrix_get; 693 r->set = pirq_cyrix_set;
701 r->set = pirq_cyrix_set; 694 return 1;
702 return 1;
703 } 695 }
704 return 0; 696 return 0;
705} 697}
706 698
707static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 699static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
708{ 700{
709 switch(device) 701 switch (device) {
710 { 702 case PCI_DEVICE_ID_OPTI_82C700:
711 case PCI_DEVICE_ID_OPTI_82C700: 703 r->name = "OPTI";
712 r->name = "OPTI"; 704 r->get = pirq_opti_get;
713 r->get = pirq_opti_get; 705 r->set = pirq_opti_set;
714 r->set = pirq_opti_set; 706 return 1;
715 return 1;
716 } 707 }
717 return 0; 708 return 0;
718} 709}
719 710
720static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 711static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
721{ 712{
722 switch(device) 713 switch (device) {
723 { 714 case PCI_DEVICE_ID_ITE_IT8330G_0:
724 case PCI_DEVICE_ID_ITE_IT8330G_0: 715 r->name = "ITE";
725 r->name = "ITE"; 716 r->get = pirq_ite_get;
726 r->get = pirq_ite_get; 717 r->set = pirq_ite_set;
727 r->set = pirq_ite_set; 718 return 1;
728 return 1;
729 } 719 }
730 return 0; 720 return 0;
731} 721}
732 722
733static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 723static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
734{ 724{
735 switch(device) 725 switch (device) {
736 {
737 case PCI_DEVICE_ID_AL_M1533: 726 case PCI_DEVICE_ID_AL_M1533:
738 case PCI_DEVICE_ID_AL_M1563: 727 case PCI_DEVICE_ID_AL_M1563:
739 printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); 728 printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n");
@@ -747,25 +736,24 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router,
747 736
748static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 737static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
749{ 738{
750 switch(device) 739 switch (device) {
751 { 740 case PCI_DEVICE_ID_AMD_VIPER_740B:
752 case PCI_DEVICE_ID_AMD_VIPER_740B: 741 r->name = "AMD756";
753 r->name = "AMD756"; 742 break;
754 break; 743 case PCI_DEVICE_ID_AMD_VIPER_7413:
755 case PCI_DEVICE_ID_AMD_VIPER_7413: 744 r->name = "AMD766";
756 r->name = "AMD766"; 745 break;
757 break; 746 case PCI_DEVICE_ID_AMD_VIPER_7443:
758 case PCI_DEVICE_ID_AMD_VIPER_7443: 747 r->name = "AMD768";
759 r->name = "AMD768"; 748 break;
760 break; 749 default:
761 default: 750 return 0;
762 return 0;
763 } 751 }
764 r->get = pirq_amd756_get; 752 r->get = pirq_amd756_get;
765 r->set = pirq_amd756_set; 753 r->set = pirq_amd756_set;
766 return 1; 754 return 1;
767} 755}
768 756
769static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) 757static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
770{ 758{
771 switch (device) { 759 switch (device) {
@@ -807,7 +795,7 @@ static struct pci_dev *pirq_router_dev;
807 * FIXME: should we have an option to say "generic for 795 * FIXME: should we have an option to say "generic for
808 * chipset" ? 796 * chipset" ?
809 */ 797 */
810 798
811static void __init pirq_find_router(struct irq_router *r) 799static void __init pirq_find_router(struct irq_router *r)
812{ 800{
813 struct irq_routing_table *rt = pirq_table; 801 struct irq_routing_table *rt = pirq_table;
@@ -826,7 +814,7 @@ static void __init pirq_find_router(struct irq_router *r)
826 r->name = "default"; 814 r->name = "default";
827 r->get = NULL; 815 r->get = NULL;
828 r->set = NULL; 816 r->set = NULL;
829 817
830 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", 818 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
831 rt->rtr_vendor, rt->rtr_device); 819 rt->rtr_vendor, rt->rtr_device);
832 820
@@ -837,7 +825,7 @@ static void __init pirq_find_router(struct irq_router *r)
837 return; 825 return;
838 } 826 }
839 827
840 for( h = pirq_routers; h->vendor; h++) { 828 for (h = pirq_routers; h->vendor; h++) {
841 /* First look for a router match */ 829 /* First look for a router match */
842 if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) 830 if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
843 break; 831 break;
@@ -889,7 +877,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
889 877
890 if (!pirq_table) 878 if (!pirq_table)
891 return 0; 879 return 0;
892 880
893 DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); 881 DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin);
894 info = pirq_get_info(dev); 882 info = pirq_get_info(dev);
895 if (!info) { 883 if (!info) {
@@ -928,8 +916,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
928 */ 916 */
929 newirq = dev->irq; 917 newirq = dev->irq;
930 if (newirq && !((1 << newirq) & mask)) { 918 if (newirq && !((1 << newirq) & mask)) {
931 if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; 919 if (pci_probe & PCI_USE_PIRQ_MASK)
932 else printk("\n" KERN_WARNING 920 newirq = 0;
921 else
922 printk("\n" KERN_WARNING
933 "PCI: IRQ %i for device %s doesn't match PIRQ mask " 923 "PCI: IRQ %i for device %s doesn't match PIRQ mask "
934 "- try pci=usepirqmask\n" KERN_DEBUG, newirq, 924 "- try pci=usepirqmask\n" KERN_DEBUG, newirq,
935 pci_name(dev)); 925 pci_name(dev));
@@ -949,8 +939,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
949 irq = pirq & 0xf; 939 irq = pirq & 0xf;
950 DBG(" -> hardcoded IRQ %d\n", irq); 940 DBG(" -> hardcoded IRQ %d\n", irq);
951 msg = "Hardcoded"; 941 msg = "Hardcoded";
952 } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ 942 } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
953 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { 943 ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
954 DBG(" -> got IRQ %d\n", irq); 944 DBG(" -> got IRQ %d\n", irq);
955 msg = "Found"; 945 msg = "Found";
956 eisa_set_level_irq(irq); 946 eisa_set_level_irq(irq);
@@ -985,15 +975,15 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
985 continue; 975 continue;
986 if (info->irq[pin].link == pirq) { 976 if (info->irq[pin].link == pirq) {
987 /* We refuse to override the dev->irq information. Give a warning! */ 977 /* We refuse to override the dev->irq information. Give a warning! */
988 if ( dev2->irq && dev2->irq != irq && \ 978 if (dev2->irq && dev2->irq != irq && \
989 (!(pci_probe & PCI_USE_PIRQ_MASK) || \ 979 (!(pci_probe & PCI_USE_PIRQ_MASK) || \
990 ((1 << dev2->irq) & mask)) ) { 980 ((1 << dev2->irq) & mask))) {
991#ifndef CONFIG_PCI_MSI 981#ifndef CONFIG_PCI_MSI
992 printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", 982 printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
993 pci_name(dev2), dev2->irq, irq); 983 pci_name(dev2), dev2->irq, irq);
994#endif 984#endif
995 continue; 985 continue;
996 } 986 }
997 dev2->irq = irq; 987 dev2->irq = irq;
998 pirq_penalty[irq]++; 988 pirq_penalty[irq]++;
999 if (dev != dev2) 989 if (dev != dev2)
@@ -1031,8 +1021,7 @@ static void __init pcibios_fixup_irqs(void)
1031 /* 1021 /*
1032 * Recalculate IRQ numbers if we use the I/O APIC. 1022 * Recalculate IRQ numbers if we use the I/O APIC.
1033 */ 1023 */
1034 if (io_apic_assign_pci_irqs) 1024 if (io_apic_assign_pci_irqs) {
1035 {
1036 int irq; 1025 int irq;
1037 1026
1038 if (pin) { 1027 if (pin) {
@@ -1045,10 +1034,10 @@ static void __init pcibios_fixup_irqs(void)
1045 * busses itself so we should get into this branch reliably. 1034 * busses itself so we should get into this branch reliably.
1046 */ 1035 */
1047 if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ 1036 if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
1048 struct pci_dev * bridge = dev->bus->self; 1037 struct pci_dev *bridge = dev->bus->self;
1049 1038
1050 pin = (pin + PCI_SLOT(dev->devfn)) % 4; 1039 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
1051 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1040 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1052 PCI_SLOT(bridge->devfn), pin); 1041 PCI_SLOT(bridge->devfn), pin);
1053 if (irq >= 0) 1042 if (irq >= 0)
1054 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", 1043 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
@@ -1118,7 +1107,7 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
1118 { } 1107 { }
1119}; 1108};
1120 1109
1121static int __init pcibios_irq_init(void) 1110int __init pcibios_irq_init(void)
1122{ 1111{
1123 DBG(KERN_DEBUG "PCI: IRQ init\n"); 1112 DBG(KERN_DEBUG "PCI: IRQ init\n");
1124 1113
@@ -1138,7 +1127,7 @@ static int __init pcibios_irq_init(void)
1138 pirq_find_router(&pirq_router); 1127 pirq_find_router(&pirq_router);
1139 if (pirq_table->exclusive_irqs) { 1128 if (pirq_table->exclusive_irqs) {
1140 int i; 1129 int i;
1141 for (i=0; i<16; i++) 1130 for (i = 0; i < 16; i++)
1142 if (!(pirq_table->exclusive_irqs & (1 << i))) 1131 if (!(pirq_table->exclusive_irqs & (1 << i)))
1143 pirq_penalty[i] += 100; 1132 pirq_penalty[i] += 100;
1144 } 1133 }
@@ -1153,9 +1142,6 @@ static int __init pcibios_irq_init(void)
1153 return 0; 1142 return 0;
1154} 1143}
1155 1144
1156subsys_initcall(pcibios_irq_init);
1157
1158
1159static void pirq_penalize_isa_irq(int irq, int active) 1145static void pirq_penalize_isa_irq(int irq, int active)
1160{ 1146{
1161 /* 1147 /*
@@ -1203,10 +1189,10 @@ static int pirq_enable_irq(struct pci_dev *dev)
1203 */ 1189 */
1204 temp_dev = dev; 1190 temp_dev = dev;
1205 while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ 1191 while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
1206 struct pci_dev * bridge = dev->bus->self; 1192 struct pci_dev *bridge = dev->bus->self;
1207 1193
1208 pin = (pin + PCI_SLOT(dev->devfn)) % 4; 1194 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
1209 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 1195 irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
1210 PCI_SLOT(bridge->devfn), pin); 1196 PCI_SLOT(bridge->devfn), pin);
1211 if (irq >= 0) 1197 if (irq >= 0)
1212 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", 1198 printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index a67921ce60af..132876cc6fca 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -55,4 +55,18 @@ static int __init pci_legacy_init(void)
55 return 0; 55 return 0;
56} 56}
57 57
58subsys_initcall(pci_legacy_init); 58int __init pci_subsys_init(void)
59{
60#ifdef CONFIG_ACPI
61 pci_acpi_init();
62#endif
63 pci_legacy_init();
64 pcibios_irq_init();
65#ifdef CONFIG_X86_NUMAQ
66 pci_numa_init();
67#endif
68 pcibios_init();
69
70 return 0;
71}
72subsys_initcall(pci_subsys_init);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 0cfebecf2a8f..23faaa890ffc 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -374,7 +374,7 @@ reject:
374 374
375static int __initdata known_bridge; 375static int __initdata known_bridge;
376 376
377void __init __pci_mmcfg_init(int early) 377static void __init __pci_mmcfg_init(int early)
378{ 378{
379 /* MMCONFIG disabled */ 379 /* MMCONFIG disabled */
380 if ((pci_probe & PCI_PROBE_MMCONF) == 0) 380 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c
deleted file mode 100644
index 022943999b84..000000000000
--- a/arch/x86/pci/mp_bus_to_node.c
+++ /dev/null
@@ -1,23 +0,0 @@
1#include <linux/pci.h>
2#include <linux/init.h>
3#include <linux/topology.h>
4
5#define BUS_NR 256
6
7static unsigned char mp_bus_to_node[BUS_NR];
8
9void set_mp_bus_to_node(int busnum, int node)
10{
11 if (busnum >= 0 && busnum < BUS_NR)
12 mp_bus_to_node[busnum] = (unsigned char) node;
13}
14
15int get_mp_bus_to_node(int busnum)
16{
17 int node;
18
19 if (busnum < 0 || busnum > (BUS_NR - 1))
20 return 0;
21 node = mp_bus_to_node[busnum];
22 return node;
23}
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index d9afbae5092b..8b5ca1966731 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -6,45 +6,21 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <mach_apic.h> 8#include <mach_apic.h>
9#include <asm/mpspec.h>
9#include "pci.h" 10#include "pci.h"
10 11
11#define XQUAD_PORTIO_BASE 0xfe400000 12#define XQUAD_PORTIO_BASE 0xfe400000
12#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ 13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
13 14
14int mp_bus_id_to_node[MAX_MP_BUSSES];
15#define BUS2QUAD(global) (mp_bus_id_to_node[global]) 15#define BUS2QUAD(global) (mp_bus_id_to_node[global])
16 16
17int mp_bus_id_to_local[MAX_MP_BUSSES];
18#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) 17#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
19 18
20void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
21 struct mpc_config_translation *translation)
22{
23 int quad = translation->trans_quad;
24 int local = translation->trans_local;
25
26 mp_bus_id_to_node[m->mpc_busid] = quad;
27 mp_bus_id_to_local[m->mpc_busid] = local;
28 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
29 m->mpc_busid, name, quad);
30}
31
32int quad_local_to_mp_bus_id [NR_CPUS/4][4];
33#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) 19#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
34void mpc_oem_pci_bus(struct mpc_config_bus *m,
35 struct mpc_config_translation *translation)
36{
37 int quad = translation->trans_quad;
38 int local = translation->trans_local;
39
40 quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
41}
42 20
43/* Where the IO area was mapped on multiquad, always 0 otherwise */ 21/* Where the IO area was mapped on multiquad, always 0 otherwise */
44void *xquad_portio; 22void *xquad_portio;
45#ifdef CONFIG_X86_NUMAQ
46EXPORT_SYMBOL(xquad_portio); 23EXPORT_SYMBOL(xquad_portio);
47#endif
48 24
49#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) 25#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
50 26
@@ -175,10 +151,13 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
175} 151}
176DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); 152DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
177 153
178static int __init pci_numa_init(void) 154int __init pci_numa_init(void)
179{ 155{
180 int quad; 156 int quad;
181 157
158 if (!found_numaq)
159 return 0;
160
182 raw_pci_ops = &pci_direct_conf1_mq; 161 raw_pci_ops = &pci_direct_conf1_mq;
183 162
184 if (pcibios_scanned++) 163 if (pcibios_scanned++)
@@ -197,5 +176,3 @@ static int __init pci_numa_init(void)
197 } 176 }
198 return 0; 177 return 0;
199} 178}
200
201subsys_initcall(pci_numa_init);
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 720c4c554534..b2270a55b0cf 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -27,6 +27,7 @@
27#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 27#define PCI_CAN_SKIP_ISA_ALIGN 0x8000
28#define PCI_USE__CRS 0x10000 28#define PCI_USE__CRS 0x10000
29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
30#define PCI_HAS_IO_ECS 0x40000
30 31
31extern unsigned int pci_probe; 32extern unsigned int pci_probe;
32extern unsigned long pirq_table_addr; 33extern unsigned long pirq_table_addr;
@@ -38,9 +39,6 @@ enum pci_bf_sort_state {
38 pci_dmi_bf, 39 pci_dmi_bf,
39}; 40};
40 41
41extern void __init dmi_check_pciprobe(void);
42extern void __init dmi_check_skip_isa_align(void);
43
44/* pci-i386.c */ 42/* pci-i386.c */
45 43
46extern unsigned int pcibios_max_latency; 44extern unsigned int pcibios_max_latency;
@@ -98,10 +96,19 @@ extern struct pci_raw_ops *raw_pci_ext_ops;
98 96
99extern struct pci_raw_ops pci_direct_conf1; 97extern struct pci_raw_ops pci_direct_conf1;
100 98
99/* arch_initcall level */
101extern int pci_direct_probe(void); 100extern int pci_direct_probe(void);
102extern void pci_direct_init(int type); 101extern void pci_direct_init(int type);
103extern void pci_pcbios_init(void); 102extern void pci_pcbios_init(void);
104extern int pci_olpc_init(void); 103extern int pci_olpc_init(void);
104extern void __init dmi_check_pciprobe(void);
105extern void __init dmi_check_skip_isa_align(void);
106
107/* some common used subsys_initcalls */
108extern int __init pci_acpi_init(void);
109extern int __init pcibios_irq_init(void);
110extern int __init pci_numa_init(void);
111extern int __init pcibios_init(void);
105 112
106/* pci-mmconfig.c */ 113/* pci-mmconfig.c */
107 114
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index c2df4e97eed6..1a7bed492bb1 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -8,18 +8,19 @@
8#include <linux/pci.h> 8#include <linux/pci.h>
9#include <linux/init.h> 9#include <linux/init.h>
10 10
11#include "cobalt.h" 11#include <asm/setup.h>
12#include "lithium.h" 12#include <asm/visws/cobalt.h>
13#include <asm/visws/lithium.h>
13 14
14#include "pci.h" 15#include "pci.h"
15 16
16static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } 17static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
17static void pci_visws_disable_irq(struct pci_dev *dev) { } 18static void pci_visws_disable_irq(struct pci_dev *dev) { }
18 19
19int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; 20/* int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; */
20void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; 21/* void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; */
21 22
22void __init pcibios_penalize_isa_irq(int irq, int active) {} 23/* void __init pcibios_penalize_isa_irq(int irq, int active) {} */
23 24
24 25
25unsigned int pci_bus0, pci_bus1; 26unsigned int pci_bus0, pci_bus1;
@@ -85,7 +86,7 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
85 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); 86 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
86} 87}
87 88
88static int __init pcibios_init(void) 89static int __init pci_visws_init(void)
89{ 90{
90 /* The VISWS supports configuration access type 1 only */ 91 /* The VISWS supports configuration access type 1 only */
91 pci_probe = (pci_probe | PCI_PROBE_CONF1) & 92 pci_probe = (pci_probe | PCI_PROBE_CONF1) &
@@ -105,4 +106,17 @@ static int __init pcibios_init(void)
105 return 0; 106 return 0;
106} 107}
107 108
108subsys_initcall(pcibios_init); 109static __init int pci_subsys_init(void)
110{
111 if (!is_visws_box())
112 return -1;
113
114 pcibios_enable_irq = &pci_visws_enable_irq;
115 pcibios_disable_irq = &pci_visws_disable_irq;
116
117 pci_visws_init();
118 pcibios_init();
119
120 return 0;
121}
122subsys_initcall(pci_subsys_init);
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index b542355e0e34..6dd000dd7933 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -83,7 +83,7 @@ static int set_up_temporary_mappings(void)
83 83
84 /* Set up the direct mapping from scratch */ 84 /* Set up the direct mapping from scratch */
85 start = (unsigned long)pfn_to_kaddr(0); 85 start = (unsigned long)pfn_to_kaddr(0);
86 end = (unsigned long)pfn_to_kaddr(end_pfn); 86 end = (unsigned long)pfn_to_kaddr(max_pfn);
87 87
88 for (; start < end; start = next) { 88 for (; start < end; start = next) {
89 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); 89 pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index cf058fecfcee..0bce5429a515 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -203,20 +203,11 @@ static struct page *vdso32_pages[1];
203 203
204#ifdef CONFIG_X86_64 204#ifdef CONFIG_X86_64
205 205
206static int use_sysenter __read_mostly = -1; 206#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
207
208#define vdso32_sysenter() (use_sysenter > 0)
209 207
210/* May not be __init: called during resume */ 208/* May not be __init: called during resume */
211void syscall32_cpu_init(void) 209void syscall32_cpu_init(void)
212{ 210{
213 if (use_sysenter < 0) {
214 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
215 use_sysenter = 1;
216 if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
217 use_sysenter = 1;
218 }
219
220 /* Load these always in case some future AMD CPU supports 211 /* Load these always in case some future AMD CPU supports
221 SYSENTER from compat mode too. */ 212 SYSENTER from compat mode too. */
222 checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 213 checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 3fdd51497a83..19a6cfaf5db9 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -16,7 +16,7 @@
16#include "vextern.h" /* Just for VMAGIC. */ 16#include "vextern.h" /* Just for VMAGIC. */
17#undef VEXTERN 17#undef VEXTERN
18 18
19int vdso_enabled = 1; 19unsigned int __read_mostly vdso_enabled = 1;
20 20
21extern char vdso_start[], vdso_end[]; 21extern char vdso_start[], vdso_end[];
22extern unsigned short vdso_sync_cpuid; 22extern unsigned short vdso_sync_cpuid;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 6c388e593bc8..c2cc99580871 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -12,3 +12,13 @@ config XEN
12 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
13 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
14 Xen hypervisor. 14 Xen hypervisor.
15
16config XEN_MAX_DOMAIN_MEMORY
17 int "Maximum allowed size of a domain in gigabytes"
18 default 8
19 depends on XEN
20 help
21 The pseudo-physical to machine address array is sized
22 according to the maximum possible memory size of a Xen
23 domain. This array uses 1 page per gigabyte, so there's no
24 need to be too stingy here. \ No newline at end of file
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3d8df981d5fd..2ba2d1649131 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y := enlighten.o setup.o multicalls.o mmu.o \ 1obj-y := enlighten.o setup.o multicalls.o mmu.o \
2 time.o manage.o xen-asm.o grant-table.o 2 time.o xen-asm.o grant-table.o suspend.o
3 3
4obj-$(CONFIG_SMP) += smp.o 4obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09c1c69c37a..dcd4e51f2f16 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
45#include <asm/pgtable.h> 45#include <asm/pgtable.h>
46#include <asm/tlbflush.h> 46#include <asm/tlbflush.h>
47#include <asm/reboot.h> 47#include <asm/reboot.h>
48#include <asm/pgalloc.h>
48 49
49#include "xen-ops.h" 50#include "xen-ops.h"
50#include "mmu.h" 51#include "mmu.h"
@@ -75,13 +76,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
75struct start_info *xen_start_info; 76struct start_info *xen_start_info;
76EXPORT_SYMBOL_GPL(xen_start_info); 77EXPORT_SYMBOL_GPL(xen_start_info);
77 78
78static /* __initdata */ struct shared_info dummy_shared_info; 79struct shared_info xen_dummy_shared_info;
79 80
80/* 81/*
81 * Point at some empty memory to start with. We map the real shared_info 82 * Point at some empty memory to start with. We map the real shared_info
82 * page as soon as fixmap is up and running. 83 * page as soon as fixmap is up and running.
83 */ 84 */
84struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; 85struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
85 86
86/* 87/*
87 * Flag to determine whether vcpu info placement is available on all 88 * Flag to determine whether vcpu info placement is available on all
@@ -98,13 +99,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
98 */ 99 */
99static int have_vcpu_info_placement = 1; 100static int have_vcpu_info_placement = 1;
100 101
101static void __init xen_vcpu_setup(int cpu) 102static void xen_vcpu_setup(int cpu)
102{ 103{
103 struct vcpu_register_vcpu_info info; 104 struct vcpu_register_vcpu_info info;
104 int err; 105 int err;
105 struct vcpu_info *vcpup; 106 struct vcpu_info *vcpup;
106 107
107 BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); 108 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 109 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
109 110
110 if (!have_vcpu_info_placement) 111 if (!have_vcpu_info_placement)
@@ -136,11 +137,41 @@ static void __init xen_vcpu_setup(int cpu)
136 } 137 }
137} 138}
138 139
140/*
141 * On restore, set the vcpu placement up again.
142 * If it fails, then we're in a bad state, since
143 * we can't back out from using it...
144 */
145void xen_vcpu_restore(void)
146{
147 if (have_vcpu_info_placement) {
148 int cpu;
149
150 for_each_online_cpu(cpu) {
151 bool other_cpu = (cpu != smp_processor_id());
152
153 if (other_cpu &&
154 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
155 BUG();
156
157 xen_vcpu_setup(cpu);
158
159 if (other_cpu &&
160 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
161 BUG();
162 }
163
164 BUG_ON(!have_vcpu_info_placement);
165 }
166}
167
139static void __init xen_banner(void) 168static void __init xen_banner(void)
140{ 169{
141 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 170 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
142 pv_info.name); 171 pv_info.name);
143 printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); 172 printk(KERN_INFO "Hypervisor signature: %s%s\n",
173 xen_start_info->magic,
174 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
144} 175}
145 176
146static void xen_cpuid(unsigned int *ax, unsigned int *bx, 177static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@ -235,13 +266,13 @@ static void xen_irq_enable(void)
235{ 266{
236 struct vcpu_info *vcpu; 267 struct vcpu_info *vcpu;
237 268
238 /* There's a one instruction preempt window here. We need to 269 /* We don't need to worry about being preempted here, since
239 make sure we're don't switch CPUs between getting the vcpu 270 either a) interrupts are disabled, so no preemption, or b)
240 pointer and updating the mask. */ 271 the caller is confused and is trying to re-enable interrupts
241 preempt_disable(); 272 on an indeterminate processor. */
273
242 vcpu = x86_read_percpu(xen_vcpu); 274 vcpu = x86_read_percpu(xen_vcpu);
243 vcpu->evtchn_upcall_mask = 0; 275 vcpu->evtchn_upcall_mask = 0;
244 preempt_enable_no_resched();
245 276
246 /* Doesn't matter if we get preempted here, because any 277 /* Doesn't matter if we get preempted here, because any
247 pending event will get dealt with anyway. */ 278 pending event will get dealt with anyway. */
@@ -254,7 +285,7 @@ static void xen_irq_enable(void)
254static void xen_safe_halt(void) 285static void xen_safe_halt(void)
255{ 286{
256 /* Blocking includes an implicit local_irq_enable(). */ 287 /* Blocking includes an implicit local_irq_enable(). */
257 if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) 288 if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
258 BUG(); 289 BUG();
259} 290}
260 291
@@ -607,6 +638,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
607 xen_mc_issue(PARAVIRT_LAZY_MMU); 638 xen_mc_issue(PARAVIRT_LAZY_MMU);
608} 639}
609 640
641static void xen_clts(void)
642{
643 struct multicall_space mcs;
644
645 mcs = xen_mc_entry(0);
646
647 MULTI_fpu_taskswitch(mcs.mc, 0);
648
649 xen_mc_issue(PARAVIRT_LAZY_CPU);
650}
651
652static void xen_write_cr0(unsigned long cr0)
653{
654 struct multicall_space mcs;
655
656 /* Only pay attention to cr0.TS; everything else is
657 ignored. */
658 mcs = xen_mc_entry(0);
659
660 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
661
662 xen_mc_issue(PARAVIRT_LAZY_CPU);
663}
664
610static void xen_write_cr2(unsigned long cr2) 665static void xen_write_cr2(unsigned long cr2)
611{ 666{
612 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 667 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
@@ -624,8 +679,10 @@ static unsigned long xen_read_cr2_direct(void)
624 679
625static void xen_write_cr4(unsigned long cr4) 680static void xen_write_cr4(unsigned long cr4)
626{ 681{
627 /* Just ignore cr4 changes; Xen doesn't allow us to do 682 cr4 &= ~X86_CR4_PGE;
628 anything anyway. */ 683 cr4 &= ~X86_CR4_PSE;
684
685 native_write_cr4(cr4);
629} 686}
630 687
631static unsigned long xen_read_cr3(void) 688static unsigned long xen_read_cr3(void)
@@ -831,7 +888,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
831 PFN_DOWN(__pa(xen_start_info->pt_base))); 888 PFN_DOWN(__pa(xen_start_info->pt_base)));
832} 889}
833 890
834static __init void setup_shared_info(void) 891void xen_setup_shared_info(void)
835{ 892{
836 if (!xen_feature(XENFEAT_auto_translated_physmap)) { 893 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
837 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); 894 unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
@@ -854,6 +911,8 @@ static __init void setup_shared_info(void)
854 /* In UP this is as good a place as any to set up shared info */ 911 /* In UP this is as good a place as any to set up shared info */
855 xen_setup_vcpu_info_placement(); 912 xen_setup_vcpu_info_placement();
856#endif 913#endif
914
915 xen_setup_mfn_list_list();
857} 916}
858 917
859static __init void xen_pagetable_setup_done(pgd_t *base) 918static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -866,15 +925,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
866 pv_mmu_ops.release_pmd = xen_release_pmd; 925 pv_mmu_ops.release_pmd = xen_release_pmd;
867 pv_mmu_ops.set_pte = xen_set_pte; 926 pv_mmu_ops.set_pte = xen_set_pte;
868 927
869 setup_shared_info(); 928 xen_setup_shared_info();
870 929
871 /* Actually pin the pagetable down, but we can't set PG_pinned 930 /* Actually pin the pagetable down, but we can't set PG_pinned
872 yet because the page structures don't exist yet. */ 931 yet because the page structures don't exist yet. */
873 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); 932 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
874} 933}
875 934
935static __init void xen_post_allocator_init(void)
936{
937 pv_mmu_ops.set_pmd = xen_set_pmd;
938 pv_mmu_ops.set_pud = xen_set_pud;
939
940 xen_mark_init_mm_pinned();
941}
942
876/* This is called once we have the cpu_possible_map */ 943/* This is called once we have the cpu_possible_map */
877void __init xen_setup_vcpu_info_placement(void) 944void xen_setup_vcpu_info_placement(void)
878{ 945{
879 int cpu; 946 int cpu;
880 947
@@ -947,6 +1014,33 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
947 return ret; 1014 return ret;
948} 1015}
949 1016
1017static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
1018{
1019 pte_t pte;
1020
1021 phys >>= PAGE_SHIFT;
1022
1023 switch (idx) {
1024 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
1025#ifdef CONFIG_X86_F00F_BUG
1026 case FIX_F00F_IDT:
1027#endif
1028 case FIX_WP_TEST:
1029 case FIX_VDSO:
1030#ifdef CONFIG_X86_LOCAL_APIC
1031 case FIX_APIC_BASE: /* maps dummy local APIC */
1032#endif
1033 pte = pfn_pte(phys, prot);
1034 break;
1035
1036 default:
1037 pte = mfn_pte(phys, prot);
1038 break;
1039 }
1040
1041 __native_set_fixmap(idx, pte);
1042}
1043
950static const struct pv_info xen_info __initdata = { 1044static const struct pv_info xen_info __initdata = {
951 .paravirt_enabled = 1, 1045 .paravirt_enabled = 1,
952 .shared_kernel_pmd = 0, 1046 .shared_kernel_pmd = 0,
@@ -960,7 +1054,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
960 .banner = xen_banner, 1054 .banner = xen_banner,
961 .memory_setup = xen_memory_setup, 1055 .memory_setup = xen_memory_setup,
962 .arch_setup = xen_arch_setup, 1056 .arch_setup = xen_arch_setup,
963 .post_allocator_init = xen_mark_init_mm_pinned, 1057 .post_allocator_init = xen_post_allocator_init,
964}; 1058};
965 1059
966static const struct pv_time_ops xen_time_ops __initdata = { 1060static const struct pv_time_ops xen_time_ops __initdata = {
@@ -968,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = {
968 1062
969 .set_wallclock = xen_set_wallclock, 1063 .set_wallclock = xen_set_wallclock,
970 .get_wallclock = xen_get_wallclock, 1064 .get_wallclock = xen_get_wallclock,
971 .get_cpu_khz = xen_cpu_khz, 1065 .get_tsc_khz = xen_tsc_khz,
972 .sched_clock = xen_sched_clock, 1066 .sched_clock = xen_sched_clock,
973}; 1067};
974 1068
@@ -978,10 +1072,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
978 .set_debugreg = xen_set_debugreg, 1072 .set_debugreg = xen_set_debugreg,
979 .get_debugreg = xen_get_debugreg, 1073 .get_debugreg = xen_get_debugreg,
980 1074
981 .clts = native_clts, 1075 .clts = xen_clts,
982 1076
983 .read_cr0 = native_read_cr0, 1077 .read_cr0 = native_read_cr0,
984 .write_cr0 = native_write_cr0, 1078 .write_cr0 = xen_write_cr0,
985 1079
986 .read_cr4 = native_read_cr4, 1080 .read_cr4 = native_read_cr4,
987 .read_cr4_safe = native_read_cr4_safe, 1081 .read_cr4_safe = native_read_cr4_safe,
@@ -995,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
995 .read_pmc = native_read_pmc, 1089 .read_pmc = native_read_pmc,
996 1090
997 .iret = xen_iret, 1091 .iret = xen_iret,
998 .irq_enable_syscall_ret = xen_sysexit, 1092 .irq_enable_sysexit = xen_sysexit,
999 1093
1000 .load_tr_desc = paravirt_nop, 1094 .load_tr_desc = paravirt_nop,
1001 .set_ldt = xen_set_ldt, 1095 .set_ldt = xen_set_ldt,
@@ -1029,6 +1123,9 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1029 .irq_enable = xen_irq_enable, 1123 .irq_enable = xen_irq_enable,
1030 .safe_halt = xen_safe_halt, 1124 .safe_halt = xen_safe_halt,
1031 .halt = xen_halt, 1125 .halt = xen_halt,
1126#ifdef CONFIG_X86_64
1127 .adjust_exception_frame = paravirt_nop,
1128#endif
1032}; 1129};
1033 1130
1034static const struct pv_apic_ops xen_apic_ops __initdata = { 1131static const struct pv_apic_ops xen_apic_ops __initdata = {
@@ -1060,6 +1157,9 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1060 .pte_update = paravirt_nop, 1157 .pte_update = paravirt_nop,
1061 .pte_update_defer = paravirt_nop, 1158 .pte_update_defer = paravirt_nop,
1062 1159
1160 .pgd_alloc = __paravirt_pgd_alloc,
1161 .pgd_free = paravirt_nop,
1162
1063 .alloc_pte = xen_alloc_pte_init, 1163 .alloc_pte = xen_alloc_pte_init,
1064 .release_pte = xen_release_pte_init, 1164 .release_pte = xen_release_pte_init,
1065 .alloc_pmd = xen_alloc_pte_init, 1165 .alloc_pmd = xen_alloc_pte_init,
@@ -1072,9 +1172,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1072 1172
1073 .set_pte = NULL, /* see xen_pagetable_setup_* */ 1173 .set_pte = NULL, /* see xen_pagetable_setup_* */
1074 .set_pte_at = xen_set_pte_at, 1174 .set_pte_at = xen_set_pte_at,
1075 .set_pmd = xen_set_pmd, 1175 .set_pmd = xen_set_pmd_hyper,
1176
1177 .ptep_modify_prot_start = __ptep_modify_prot_start,
1178 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1076 1179
1077 .pte_val = xen_pte_val, 1180 .pte_val = xen_pte_val,
1181 .pte_flags = native_pte_val,
1078 .pgd_val = xen_pgd_val, 1182 .pgd_val = xen_pgd_val,
1079 1183
1080 .make_pte = xen_make_pte, 1184 .make_pte = xen_make_pte,
@@ -1082,7 +1186,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1082 1186
1083 .set_pte_atomic = xen_set_pte_atomic, 1187 .set_pte_atomic = xen_set_pte_atomic,
1084 .set_pte_present = xen_set_pte_at, 1188 .set_pte_present = xen_set_pte_at,
1085 .set_pud = xen_set_pud, 1189 .set_pud = xen_set_pud_hyper,
1086 .pte_clear = xen_pte_clear, 1190 .pte_clear = xen_pte_clear,
1087 .pmd_clear = xen_pmd_clear, 1191 .pmd_clear = xen_pmd_clear,
1088 1192
@@ -1097,6 +1201,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1097 .enter = paravirt_enter_lazy_mmu, 1201 .enter = paravirt_enter_lazy_mmu,
1098 .leave = xen_leave_lazy, 1202 .leave = xen_leave_lazy,
1099 }, 1203 },
1204
1205 .set_fixmap = xen_set_fixmap,
1100}; 1206};
1101 1207
1102#ifdef CONFIG_SMP 1208#ifdef CONFIG_SMP
@@ -1114,11 +1220,13 @@ static const struct smp_ops xen_smp_ops __initdata = {
1114 1220
1115static void xen_reboot(int reason) 1221static void xen_reboot(int reason)
1116{ 1222{
1223 struct sched_shutdown r = { .reason = reason };
1224
1117#ifdef CONFIG_SMP 1225#ifdef CONFIG_SMP
1118 smp_send_stop(); 1226 smp_send_stop();
1119#endif 1227#endif
1120 1228
1121 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) 1229 if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1122 BUG(); 1230 BUG();
1123} 1231}
1124 1232
@@ -1173,6 +1281,8 @@ asmlinkage void __init xen_start_kernel(void)
1173 1281
1174 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); 1282 BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
1175 1283
1284 xen_setup_features();
1285
1176 /* Install Xen paravirt ops */ 1286 /* Install Xen paravirt ops */
1177 pv_info = xen_info; 1287 pv_info = xen_info;
1178 pv_init_ops = xen_init_ops; 1288 pv_init_ops = xen_init_ops;
@@ -1182,21 +1292,26 @@ asmlinkage void __init xen_start_kernel(void)
1182 pv_apic_ops = xen_apic_ops; 1292 pv_apic_ops = xen_apic_ops;
1183 pv_mmu_ops = xen_mmu_ops; 1293 pv_mmu_ops = xen_mmu_ops;
1184 1294
1295 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1296 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1297 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
1298 }
1299
1185 machine_ops = xen_machine_ops; 1300 machine_ops = xen_machine_ops;
1186 1301
1187#ifdef CONFIG_SMP 1302#ifdef CONFIG_SMP
1188 smp_ops = xen_smp_ops; 1303 smp_ops = xen_smp_ops;
1189#endif 1304#endif
1190 1305
1191 xen_setup_features();
1192
1193 /* Get mfn list */ 1306 /* Get mfn list */
1194 if (!xen_feature(XENFEAT_auto_translated_physmap)) 1307 if (!xen_feature(XENFEAT_auto_translated_physmap))
1195 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; 1308 xen_build_dynamic_phys_to_machine();
1196 1309
1197 pgd = (pgd_t *)xen_start_info->pt_base; 1310 pgd = (pgd_t *)xen_start_info->pt_base;
1198 1311
1312 init_pg_tables_start = __pa(pgd);
1199 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1313 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
1314 max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
1200 1315
1201 init_mm.pgd = pgd; /* use the Xen pagetables to start */ 1316 init_mm.pgd = pgd; /* use the Xen pagetables to start */
1202 1317
@@ -1232,9 +1347,12 @@ asmlinkage void __init xen_start_kernel(void)
1232 ? __pa(xen_start_info->mod_start) : 0; 1347 ? __pa(xen_start_info->mod_start) : 0;
1233 boot_params.hdr.ramdisk_size = xen_start_info->mod_len; 1348 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1234 1349
1235 if (!is_initial_xendomain()) 1350 if (!is_initial_xendomain()) {
1351 add_preferred_console("xenboot", 0, NULL);
1352 add_preferred_console("tty", 0, NULL);
1236 add_preferred_console("hvc", 0, NULL); 1353 add_preferred_console("hvc", 0, NULL);
1354 }
1237 1355
1238 /* Start the world */ 1356 /* Start the world */
1239 start_kernel(); 1357 i386_start_kernel();
1240} 1358}
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c
deleted file mode 100644
index aa7af9e6abc0..000000000000
--- a/arch/x86/xen/manage.c
+++ /dev/null
@@ -1,143 +0,0 @@
1/*
2 * Handle extern requests for shutdown, reboot and sysrq
3 */
4#include <linux/kernel.h>
5#include <linux/err.h>
6#include <linux/reboot.h>
7#include <linux/sysrq.h>
8
9#include <xen/xenbus.h>
10
11#define SHUTDOWN_INVALID -1
12#define SHUTDOWN_POWEROFF 0
13#define SHUTDOWN_SUSPEND 2
14/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
15 * report a crash, not be instructed to crash!
16 * HALT is the same as POWEROFF, as far as we're concerned. The tools use
17 * the distinction when we return the reason code to them.
18 */
19#define SHUTDOWN_HALT 4
20
21/* Ignore multiple shutdown requests. */
22static int shutting_down = SHUTDOWN_INVALID;
23
24static void shutdown_handler(struct xenbus_watch *watch,
25 const char **vec, unsigned int len)
26{
27 char *str;
28 struct xenbus_transaction xbt;
29 int err;
30
31 if (shutting_down != SHUTDOWN_INVALID)
32 return;
33
34 again:
35 err = xenbus_transaction_start(&xbt);
36 if (err)
37 return;
38
39 str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
40 /* Ignore read errors and empty reads. */
41 if (XENBUS_IS_ERR_READ(str)) {
42 xenbus_transaction_end(xbt, 1);
43 return;
44 }
45
46 xenbus_write(xbt, "control", "shutdown", "");
47
48 err = xenbus_transaction_end(xbt, 0);
49 if (err == -EAGAIN) {
50 kfree(str);
51 goto again;
52 }
53
54 if (strcmp(str, "poweroff") == 0 ||
55 strcmp(str, "halt") == 0)
56 orderly_poweroff(false);
57 else if (strcmp(str, "reboot") == 0)
58 ctrl_alt_del();
59 else {
60 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
61 shutting_down = SHUTDOWN_INVALID;
62 }
63
64 kfree(str);
65}
66
67static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
68 unsigned int len)
69{
70 char sysrq_key = '\0';
71 struct xenbus_transaction xbt;
72 int err;
73
74 again:
75 err = xenbus_transaction_start(&xbt);
76 if (err)
77 return;
78 if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
79 printk(KERN_ERR "Unable to read sysrq code in "
80 "control/sysrq\n");
81 xenbus_transaction_end(xbt, 1);
82 return;
83 }
84
85 if (sysrq_key != '\0')
86 xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
87
88 err = xenbus_transaction_end(xbt, 0);
89 if (err == -EAGAIN)
90 goto again;
91
92 if (sysrq_key != '\0')
93 handle_sysrq(sysrq_key, NULL);
94}
95
96static struct xenbus_watch shutdown_watch = {
97 .node = "control/shutdown",
98 .callback = shutdown_handler
99};
100
101static struct xenbus_watch sysrq_watch = {
102 .node = "control/sysrq",
103 .callback = sysrq_handler
104};
105
106static int setup_shutdown_watcher(void)
107{
108 int err;
109
110 err = register_xenbus_watch(&shutdown_watch);
111 if (err) {
112 printk(KERN_ERR "Failed to set shutdown watcher\n");
113 return err;
114 }
115
116 err = register_xenbus_watch(&sysrq_watch);
117 if (err) {
118 printk(KERN_ERR "Failed to set sysrq watcher\n");
119 return err;
120 }
121
122 return 0;
123}
124
125static int shutdown_event(struct notifier_block *notifier,
126 unsigned long event,
127 void *data)
128{
129 setup_shutdown_watcher();
130 return NOTIFY_DONE;
131}
132
133static int __init setup_shutdown_event(void)
134{
135 static struct notifier_block xenstore_notifier = {
136 .notifier_call = shutdown_event
137 };
138 register_xenstore_notifier(&xenstore_notifier);
139
140 return 0;
141}
142
143subsys_initcall(setup_shutdown_event);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4e527e7893a8..42b3b9ed641d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -56,6 +56,131 @@
56#include "multicalls.h" 56#include "multicalls.h"
57#include "mmu.h" 57#include "mmu.h"
58 58
59#define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
60#define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE)
61
62/* Placeholder for holes in the address space */
63static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE]
64 __attribute__((section(".data.page_aligned"))) =
65 { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL };
66
67 /* Array of pointers to pages containing p2m entries */
68static unsigned long *p2m_top[TOP_ENTRIES]
69 __attribute__((section(".data.page_aligned"))) =
70 { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] };
71
72/* Arrays of p2m arrays expressed in mfns used for save/restore */
73static unsigned long p2m_top_mfn[TOP_ENTRIES]
74 __attribute__((section(".bss.page_aligned")));
75
76static unsigned long p2m_top_mfn_list[
77 PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)]
78 __attribute__((section(".bss.page_aligned")));
79
80static inline unsigned p2m_top_index(unsigned long pfn)
81{
82 BUG_ON(pfn >= MAX_DOMAIN_PAGES);
83 return pfn / P2M_ENTRIES_PER_PAGE;
84}
85
86static inline unsigned p2m_index(unsigned long pfn)
87{
88 return pfn % P2M_ENTRIES_PER_PAGE;
89}
90
91/* Build the parallel p2m_top_mfn structures */
92void xen_setup_mfn_list_list(void)
93{
94 unsigned pfn, idx;
95
96 for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) {
97 unsigned topidx = p2m_top_index(pfn);
98
99 p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]);
100 }
101
102 for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) {
103 unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
104 p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
105 }
106
107 BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
108
109 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
110 virt_to_mfn(p2m_top_mfn_list);
111 HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages;
112}
113
114/* Set up p2m_top to point to the domain-builder provided p2m pages */
115void __init xen_build_dynamic_phys_to_machine(void)
116{
117 unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
118 unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages);
119 unsigned pfn;
120
121 for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) {
122 unsigned topidx = p2m_top_index(pfn);
123
124 p2m_top[topidx] = &mfn_list[pfn];
125 }
126}
127
128unsigned long get_phys_to_machine(unsigned long pfn)
129{
130 unsigned topidx, idx;
131
132 if (unlikely(pfn >= MAX_DOMAIN_PAGES))
133 return INVALID_P2M_ENTRY;
134
135 topidx = p2m_top_index(pfn);
136 idx = p2m_index(pfn);
137 return p2m_top[topidx][idx];
138}
139EXPORT_SYMBOL_GPL(get_phys_to_machine);
140
141static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
142{
143 unsigned long *p;
144 unsigned i;
145
146 p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
147 BUG_ON(p == NULL);
148
149 for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
150 p[i] = INVALID_P2M_ENTRY;
151
152 if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
153 free_page((unsigned long)p);
154 else
155 *mfnp = virt_to_mfn(p);
156}
157
158void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
159{
160 unsigned topidx, idx;
161
162 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
163 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
164 return;
165 }
166
167 if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
168 BUG_ON(mfn != INVALID_P2M_ENTRY);
169 return;
170 }
171
172 topidx = p2m_top_index(pfn);
173 if (p2m_top[topidx] == p2m_missing) {
174 /* no need to allocate a page to store an invalid entry */
175 if (mfn == INVALID_P2M_ENTRY)
176 return;
177 alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
178 }
179
180 idx = p2m_index(pfn);
181 p2m_top[topidx][idx] = mfn;
182}
183
59xmaddr_t arbitrary_virt_to_machine(unsigned long address) 184xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{ 185{
61 unsigned int level; 186 unsigned int level;
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr)
98} 223}
99 224
100 225
101void xen_set_pmd(pmd_t *ptr, pmd_t val) 226static bool page_pinned(void *ptr)
227{
228 struct page *page = virt_to_page(ptr);
229
230 return PagePinned(page);
231}
232
233static void extend_mmu_update(const struct mmu_update *update)
102{ 234{
103 struct multicall_space mcs; 235 struct multicall_space mcs;
104 struct mmu_update *u; 236 struct mmu_update *u;
105 237
106 preempt_disable(); 238 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
239
240 if (mcs.mc != NULL)
241 mcs.mc->args[1]++;
242 else {
243 mcs = __xen_mc_entry(sizeof(*u));
244 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
245 }
107 246
108 mcs = xen_mc_entry(sizeof(*u));
109 u = mcs.args; 247 u = mcs.args;
110 u->ptr = virt_to_machine(ptr).maddr; 248 *u = *update;
111 u->val = pmd_val_ma(val); 249}
112 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 250
251void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
252{
253 struct mmu_update u;
254
255 preempt_disable();
256
257 xen_mc_batch();
258
259 u.ptr = virt_to_machine(ptr).maddr;
260 u.val = pmd_val_ma(val);
261 extend_mmu_update(&u);
113 262
114 xen_mc_issue(PARAVIRT_LAZY_MMU); 263 xen_mc_issue(PARAVIRT_LAZY_MMU);
115 264
116 preempt_enable(); 265 preempt_enable();
117} 266}
118 267
268void xen_set_pmd(pmd_t *ptr, pmd_t val)
269{
270 /* If page is not pinned, we can just update the entry
271 directly */
272 if (!page_pinned(ptr)) {
273 *ptr = val;
274 return;
275 }
276
277 xen_set_pmd_hyper(ptr, val);
278}
279
119/* 280/*
120 * Associate a virtual page frame with a given physical page frame 281 * Associate a virtual page frame with a given physical page frame
121 * and protection flags for that frame. 282 * and protection flags for that frame.
@@ -179,6 +340,26 @@ out:
179 preempt_enable(); 340 preempt_enable();
180} 341}
181 342
343pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
344{
345 /* Just return the pte as-is. We preserve the bits on commit */
346 return *ptep;
347}
348
349void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
350 pte_t *ptep, pte_t pte)
351{
352 struct mmu_update u;
353
354 xen_mc_batch();
355
356 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
357 u.val = pte_val_ma(pte);
358 extend_mmu_update(&u);
359
360 xen_mc_issue(PARAVIRT_LAZY_MMU);
361}
362
182/* Assume pteval_t is equivalent to all the other *val_t types. */ 363/* Assume pteval_t is equivalent to all the other *val_t types. */
183static pteval_t pte_mfn_to_pfn(pteval_t val) 364static pteval_t pte_mfn_to_pfn(pteval_t val)
184{ 365{
@@ -229,24 +410,35 @@ pmdval_t xen_pmd_val(pmd_t pmd)
229 return pte_mfn_to_pfn(pmd.pmd); 410 return pte_mfn_to_pfn(pmd.pmd);
230} 411}
231 412
232void xen_set_pud(pud_t *ptr, pud_t val) 413void xen_set_pud_hyper(pud_t *ptr, pud_t val)
233{ 414{
234 struct multicall_space mcs; 415 struct mmu_update u;
235 struct mmu_update *u;
236 416
237 preempt_disable(); 417 preempt_disable();
238 418
239 mcs = xen_mc_entry(sizeof(*u)); 419 xen_mc_batch();
240 u = mcs.args; 420
241 u->ptr = virt_to_machine(ptr).maddr; 421 u.ptr = virt_to_machine(ptr).maddr;
242 u->val = pud_val_ma(val); 422 u.val = pud_val_ma(val);
243 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); 423 extend_mmu_update(&u);
244 424
245 xen_mc_issue(PARAVIRT_LAZY_MMU); 425 xen_mc_issue(PARAVIRT_LAZY_MMU);
246 426
247 preempt_enable(); 427 preempt_enable();
248} 428}
249 429
430void xen_set_pud(pud_t *ptr, pud_t val)
431{
432 /* If page is not pinned, we can just update the entry
433 directly */
434 if (!page_pinned(ptr)) {
435 *ptr = val;
436 return;
437 }
438
439 xen_set_pud_hyper(ptr, val);
440}
441
250void xen_set_pte(pte_t *ptep, pte_t pte) 442void xen_set_pte(pte_t *ptep, pte_t pte)
251{ 443{
252 ptep->pte_high = pte.pte_high; 444 ptep->pte_high = pte.pte_high;
@@ -268,7 +460,7 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
268 460
269void xen_pmd_clear(pmd_t *pmdp) 461void xen_pmd_clear(pmd_t *pmdp)
270{ 462{
271 xen_set_pmd(pmdp, __pmd(0)); 463 set_pmd(pmdp, __pmd(0));
272} 464}
273 465
274pmd_t xen_make_pmd(pmdval_t pmd) 466pmd_t xen_make_pmd(pmdval_t pmd)
@@ -441,6 +633,29 @@ void xen_pgd_pin(pgd_t *pgd)
441 xen_mc_issue(0); 633 xen_mc_issue(0);
442} 634}
443 635
636/*
637 * On save, we need to pin all pagetables to make sure they get their
638 * mfns turned into pfns. Search the list for any unpinned pgds and pin
639 * them (unpinned pgds are not currently in use, probably because the
640 * process is under construction or destruction).
641 */
642void xen_mm_pin_all(void)
643{
644 unsigned long flags;
645 struct page *page;
646
647 spin_lock_irqsave(&pgd_lock, flags);
648
649 list_for_each_entry(page, &pgd_list, lru) {
650 if (!PagePinned(page)) {
651 xen_pgd_pin((pgd_t *)page_address(page));
652 SetPageSavePinned(page);
653 }
654 }
655
656 spin_unlock_irqrestore(&pgd_lock, flags);
657}
658
444/* The init_mm pagetable is really pinned as soon as its created, but 659/* The init_mm pagetable is really pinned as soon as its created, but
445 that's before we have page structures to store the bits. So do all 660 that's before we have page structures to store the bits. So do all
446 the book-keeping now. */ 661 the book-keeping now. */
@@ -498,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
498 xen_mc_issue(0); 713 xen_mc_issue(0);
499} 714}
500 715
716/*
717 * On resume, undo any pinning done at save, so that the rest of the
718 * kernel doesn't see any unexpected pinned pagetables.
719 */
720void xen_mm_unpin_all(void)
721{
722 unsigned long flags;
723 struct page *page;
724
725 spin_lock_irqsave(&pgd_lock, flags);
726
727 list_for_each_entry(page, &pgd_list, lru) {
728 if (PageSavePinned(page)) {
729 BUG_ON(!PagePinned(page));
730 printk("unpinning pinned %p\n", page_address(page));
731 xen_pgd_unpin((pgd_t *)page_address(page));
732 ClearPageSavePinned(page);
733 }
734 }
735
736 spin_unlock_irqrestore(&pgd_lock, flags);
737}
738
501void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 739void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
502{ 740{
503 spin_lock(&next->page_table_lock); 741 spin_lock(&next->page_table_lock);
@@ -591,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm)
591 spin_lock(&mm->page_table_lock); 829 spin_lock(&mm->page_table_lock);
592 830
593 /* pgd may not be pinned in the error exit path of execve */ 831 /* pgd may not be pinned in the error exit path of execve */
594 if (PagePinned(virt_to_page(mm->pgd))) 832 if (page_pinned(mm->pgd))
595 xen_pgd_unpin(mm->pgd); 833 xen_pgd_unpin(mm->pgd);
596 834
597 spin_unlock(&mm->page_table_lock); 835 spin_unlock(&mm->page_table_lock);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 5fe961caffd4..297bf9f5b8bc 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -25,10 +25,6 @@ enum pt_level {
25 25
26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); 26void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
27 27
28void xen_set_pte(pte_t *ptep, pte_t pteval);
29void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
30 pte_t *ptep, pte_t pteval);
31void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
32 28
33void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); 29void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
34void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); 30void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
@@ -45,11 +41,19 @@ pte_t xen_make_pte(pteval_t);
45pmd_t xen_make_pmd(pmdval_t); 41pmd_t xen_make_pmd(pmdval_t);
46pgd_t xen_make_pgd(pgdval_t); 42pgd_t xen_make_pgd(pgdval_t);
47 43
44void xen_set_pte(pte_t *ptep, pte_t pteval);
48void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 45void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
49 pte_t *ptep, pte_t pteval); 46 pte_t *ptep, pte_t pteval);
50void xen_set_pte_atomic(pte_t *ptep, pte_t pte); 47void xen_set_pte_atomic(pte_t *ptep, pte_t pte);
48void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud(pud_t *ptr, pud_t val); 49void xen_set_pud(pud_t *ptr, pud_t val);
50void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval);
51void xen_set_pud_hyper(pud_t *ptr, pud_t val);
52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
53void xen_pmd_clear(pmd_t *pmdp); 53void xen_pmd_clear(pmd_t *pmdp);
54 54
55pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
56void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
57 pte_t *ptep, pte_t pte);
58
55#endif /* _XEN_MMU_H */ 59#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 5791eb2e3750..3c63c4da7ed1 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -29,14 +29,14 @@
29#define MC_DEBUG 1 29#define MC_DEBUG 1
30 30
31#define MC_BATCH 32 31#define MC_BATCH 32
32#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) 32#define MC_ARGS (MC_BATCH * 16)
33 33
34struct mc_buffer { 34struct mc_buffer {
35 struct multicall_entry entries[MC_BATCH]; 35 struct multicall_entry entries[MC_BATCH];
36#if MC_DEBUG 36#if MC_DEBUG
37 struct multicall_entry debug[MC_BATCH]; 37 struct multicall_entry debug[MC_BATCH];
38#endif 38#endif
39 u64 args[MC_ARGS]; 39 unsigned char args[MC_ARGS];
40 struct callback { 40 struct callback {
41 void (*fn)(void *); 41 void (*fn)(void *);
42 void *data; 42 void *data;
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args)
107{ 107{
108 struct mc_buffer *b = &__get_cpu_var(mc_buffer); 108 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
109 struct multicall_space ret; 109 struct multicall_space ret;
110 unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); 110 unsigned argidx = roundup(b->argidx, sizeof(u64));
111 111
112 BUG_ON(preemptible()); 112 BUG_ON(preemptible());
113 BUG_ON(argspace > MC_ARGS); 113 BUG_ON(b->argidx > MC_ARGS);
114 114
115 if (b->mcidx == MC_BATCH || 115 if (b->mcidx == MC_BATCH ||
116 (b->argidx + argspace) > MC_ARGS) 116 (argidx + args) > MC_ARGS) {
117 xen_mc_flush(); 117 xen_mc_flush();
118 argidx = roundup(b->argidx, sizeof(u64));
119 }
118 120
119 ret.mc = &b->entries[b->mcidx]; 121 ret.mc = &b->entries[b->mcidx];
120 b->mcidx++; 122 b->mcidx++;
123 ret.args = &b->args[argidx];
124 b->argidx = argidx + args;
125
126 BUG_ON(b->argidx > MC_ARGS);
127 return ret;
128}
129
130struct multicall_space xen_mc_extend_args(unsigned long op, size_t size)
131{
132 struct mc_buffer *b = &__get_cpu_var(mc_buffer);
133 struct multicall_space ret = { NULL, NULL };
134
135 BUG_ON(preemptible());
136 BUG_ON(b->argidx > MC_ARGS);
137
138 if (b->mcidx == 0)
139 return ret;
140
141 if (b->entries[b->mcidx - 1].op != op)
142 return ret;
143
144 if ((b->argidx + size) > MC_ARGS)
145 return ret;
146
147 ret.mc = &b->entries[b->mcidx - 1];
121 ret.args = &b->args[b->argidx]; 148 ret.args = &b->args[b->argidx];
122 b->argidx += argspace; 149 b->argidx += size;
123 150
151 BUG_ON(b->argidx > MC_ARGS);
124 return ret; 152 return ret;
125} 153}
126 154
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 8bae996d99a3..858938241616 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode)
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
46void xen_mc_callback(void (*fn)(void *), void *data); 46void xen_mc_callback(void (*fn)(void *), void *data);
47 47
48/*
49 * Try to extend the arguments of the previous multicall command. The
50 * previous command's op must match. If it does, then it attempts to
51 * extend the argument space allocated to the multicall entry by
52 * arg_size bytes.
53 *
54 * The returned multicall_space will return with mc pointing to the
55 * command on success, or NULL on failure, and args pointing to the
56 * newly allocated space.
57 */
58struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size);
59
48#endif /* _XEN_MULTICALLS_H */ 60#endif /* _XEN_MULTICALLS_H */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 82517e4a752a..e0a39595bde3 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -13,9 +13,11 @@
13#include <asm/vdso.h> 13#include <asm/vdso.h>
14#include <asm/e820.h> 14#include <asm/e820.h>
15#include <asm/setup.h> 15#include <asm/setup.h>
16#include <asm/acpi.h>
16#include <asm/xen/hypervisor.h> 17#include <asm/xen/hypervisor.h>
17#include <asm/xen/hypercall.h> 18#include <asm/xen/hypercall.h>
18 19
20#include <xen/page.h>
19#include <xen/interface/callback.h> 21#include <xen/interface/callback.h>
20#include <xen/interface/physdev.h> 22#include <xen/interface/physdev.h>
21#include <xen/features.h> 23#include <xen/features.h>
@@ -27,8 +29,6 @@
27extern const char xen_hypervisor_callback[]; 29extern const char xen_hypervisor_callback[];
28extern const char xen_failsafe_callback[]; 30extern const char xen_failsafe_callback[];
29 31
30unsigned long *phys_to_machine_mapping;
31EXPORT_SYMBOL(phys_to_machine_mapping);
32 32
33/** 33/**
34 * machine_specific_memory_setup - Hook for machine specific memory setup. 34 * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -38,9 +38,31 @@ char * __init xen_memory_setup(void)
38{ 38{
39 unsigned long max_pfn = xen_start_info->nr_pages; 39 unsigned long max_pfn = xen_start_info->nr_pages;
40 40
41 max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
42
41 e820.nr_map = 0; 43 e820.nr_map = 0;
42 add_memory_region(0, LOWMEMSIZE(), E820_RAM); 44
43 add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); 45 e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM);
46
47 /*
48 * Even though this is normal, usable memory under Xen, reserve
49 * ISA memory anyway because too many things think they can poke
50 * about in there.
51 */
52 e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
53 E820_RESERVED);
54
55 /*
56 * Reserve Xen bits:
57 * - mfn_list
58 * - xen_start_info
59 * See comment above "struct start_info" in <xen/interface/xen.h>
60 */
61 e820_add_region(__pa(xen_start_info->mfn_list),
62 xen_start_info->pt_base - xen_start_info->mfn_list,
63 E820_RESERVED);
64
65 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
44 66
45 return "Xen"; 67 return "Xen";
46} 68}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 94e69000f982..d2e3c20127d7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,7 +35,7 @@
35#include "xen-ops.h" 35#include "xen-ops.h"
36#include "mmu.h" 36#include "mmu.h"
37 37
38static cpumask_t xen_cpu_initialized_map; 38cpumask_t xen_cpu_initialized_map;
39static DEFINE_PER_CPU(int, resched_irq) = -1; 39static DEFINE_PER_CPU(int, resched_irq) = -1;
40static DEFINE_PER_CPU(int, callfunc_irq) = -1; 40static DEFINE_PER_CPU(int, callfunc_irq) = -1;
41static DEFINE_PER_CPU(int, debug_irq) = -1; 41static DEFINE_PER_CPU(int, debug_irq) = -1;
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data;
65 */ 65 */
66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 66static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
67{ 67{
68#ifdef CONFIG_X86_32
69 __get_cpu_var(irq_stat).irq_resched_count++;
70#else
71 add_pda(irq_resched_count, 1);
72#endif
73
68 return IRQ_HANDLED; 74 return IRQ_HANDLED;
69} 75}
70 76
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
new file mode 100644
index 000000000000..251669a932d4
--- /dev/null
+++ b/arch/x86/xen/suspend.c
@@ -0,0 +1,45 @@
1#include <linux/types.h>
2
3#include <xen/interface/xen.h>
4#include <xen/grant_table.h>
5#include <xen/events.h>
6
7#include <asm/xen/hypercall.h>
8#include <asm/xen/page.h>
9
10#include "xen-ops.h"
11#include "mmu.h"
12
13void xen_pre_suspend(void)
14{
15 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
16 xen_start_info->console.domU.mfn =
17 mfn_to_pfn(xen_start_info->console.domU.mfn);
18
19 BUG_ON(!irqs_disabled());
20
21 HYPERVISOR_shared_info = &xen_dummy_shared_info;
22 if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
23 __pte_ma(0), 0))
24 BUG();
25}
26
27void xen_post_suspend(int suspend_cancelled)
28{
29 xen_setup_shared_info();
30
31 if (suspend_cancelled) {
32 xen_start_info->store_mfn =
33 pfn_to_mfn(xen_start_info->store_mfn);
34 xen_start_info->console.domU.mfn =
35 pfn_to_mfn(xen_start_info->console.domU.mfn);
36 } else {
37#ifdef CONFIG_SMP
38 xen_cpu_initialized_map = cpu_online_map;
39#endif
40 xen_vcpu_restore();
41 xen_timer_resume();
42 }
43
44}
45
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 41e217503c96..685b77470fc3 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void)
197} 197}
198 198
199 199
200/* Get the CPU speed from Xen */ 200/* Get the TSC speed from Xen */
201unsigned long xen_cpu_khz(void) 201unsigned long xen_tsc_khz(void)
202{ 202{
203 u64 xen_khz = 1000000ULL << 32; 203 u64 xen_khz = 1000000ULL << 32;
204 const struct pvclock_vcpu_time_info *info = 204 const struct pvclock_vcpu_time_info *info =
@@ -459,6 +459,19 @@ void xen_setup_cpu_clockevents(void)
459 clockevents_register_device(&__get_cpu_var(xen_clock_events)); 459 clockevents_register_device(&__get_cpu_var(xen_clock_events));
460} 460}
461 461
462void xen_timer_resume(void)
463{
464 int cpu;
465
466 if (xen_clockevent != &xen_vcpuop_clockevent)
467 return;
468
469 for_each_online_cpu(cpu) {
470 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
471 BUG();
472 }
473}
474
462__init void xen_time_init(void) 475__init void xen_time_init(void)
463{ 476{
464 int cpu = smp_processor_id(); 477 int cpu = smp_processor_id();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 6ec3b4f7719b..7c0cf6320a0a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <asm/boot.h> 8#include <asm/boot.h>
9#include <xen/interface/elfnote.h> 9#include <xen/interface/elfnote.h>
10#include <asm/xen/interface.h>
10 11
11 __INIT 12 __INIT
12ENTRY(startup_xen) 13ENTRY(startup_xen)
@@ -32,5 +33,9 @@ ENTRY(hypercall_page)
32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 33 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
33 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 34 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 35 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
36 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
37 .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
38 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
39 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START)
35 40
36#endif /*CONFIG_XEN */ 41#endif /*CONFIG_XEN */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f1063ae08037..d852ddbb3448 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -9,26 +9,35 @@
9extern const char xen_hypervisor_callback[]; 9extern const char xen_hypervisor_callback[];
10extern const char xen_failsafe_callback[]; 10extern const char xen_failsafe_callback[];
11 11
12struct trap_info;
12void xen_copy_trap_info(struct trap_info *traps); 13void xen_copy_trap_info(struct trap_info *traps);
13 14
14DECLARE_PER_CPU(unsigned long, xen_cr3); 15DECLARE_PER_CPU(unsigned long, xen_cr3);
15DECLARE_PER_CPU(unsigned long, xen_current_cr3); 16DECLARE_PER_CPU(unsigned long, xen_current_cr3);
16 17
17extern struct start_info *xen_start_info; 18extern struct start_info *xen_start_info;
19extern struct shared_info xen_dummy_shared_info;
18extern struct shared_info *HYPERVISOR_shared_info; 20extern struct shared_info *HYPERVISOR_shared_info;
19 21
22void xen_setup_mfn_list_list(void);
23void xen_setup_shared_info(void);
24
20char * __init xen_memory_setup(void); 25char * __init xen_memory_setup(void);
21void __init xen_arch_setup(void); 26void __init xen_arch_setup(void);
22void __init xen_init_IRQ(void); 27void __init xen_init_IRQ(void);
23void xen_enable_sysenter(void); 28void xen_enable_sysenter(void);
29void xen_vcpu_restore(void);
30
31void __init xen_build_dynamic_phys_to_machine(void);
24 32
25void xen_setup_timer(int cpu); 33void xen_setup_timer(int cpu);
26void xen_setup_cpu_clockevents(void); 34void xen_setup_cpu_clockevents(void);
27unsigned long xen_cpu_khz(void); 35unsigned long xen_tsc_khz(void);
28void __init xen_time_init(void); 36void __init xen_time_init(void);
29unsigned long xen_get_wallclock(void); 37unsigned long xen_get_wallclock(void);
30int xen_set_wallclock(unsigned long time); 38int xen_set_wallclock(unsigned long time);
31unsigned long long xen_sched_clock(void); 39unsigned long long xen_sched_clock(void);
40void xen_timer_resume(void);
32 41
33irqreturn_t xen_debug_interrupt(int irq, void *dev_id); 42irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
34 43
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
54int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), 63int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
55 void *info, int wait); 64 void *info, int wait);
56 65
66extern cpumask_t xen_cpu_initialized_map;
67
57 68
58/* Declare an asm function, along with symbols needed to make it 69/* Declare an asm function, along with symbols needed to make it
59 inlineable */ 70 inlineable */
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index c52fca833268..bba867391a85 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -4,8 +4,6 @@
4 4
5menuconfig ACPI 5menuconfig ACPI
6 bool "ACPI (Advanced Configuration and Power Interface) Support" 6 bool "ACPI (Advanced Configuration and Power Interface) Support"
7 depends on !X86_NUMAQ
8 depends on !X86_VISWS
9 depends on !IA64_HP_SIM 7 depends on !IA64_HP_SIM
10 depends on IA64 || X86 8 depends on IA64 || X86
11 depends on PCI 9 depends on PCI
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 2b4b392dcbc1..87a7f1d02578 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -153,7 +153,7 @@ EXPORT_SYMBOL(set_trace_device);
153 * it's not any guarantee, but it's a high _likelihood_ that 153 * it's not any guarantee, but it's a high _likelihood_ that
154 * the match is valid). 154 * the match is valid).
155 */ 155 */
156void generate_resume_trace(void *tracedata, unsigned int user) 156void generate_resume_trace(const void *tracedata, unsigned int user)
157{ 157{
158 unsigned short lineno = *(unsigned short *)tracedata; 158 unsigned short lineno = *(unsigned short *)tracedata;
159 const char *file = *(const char **)(tracedata + 2); 159 const char *file = *(const char **)(tracedata + 2);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index fdf4044d2e74..1efe162e16d7 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \
40 return sprintf(buf, "%d\n", topology_##name(cpu)); \ 40 return sprintf(buf, "%d\n", topology_##name(cpu)); \
41} 41}
42 42
43#if defined(topology_thread_siblings) || defined(topology_core_siblings)
43static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) 44static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
44{ 45{
45 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; 46 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
@@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
54 } 55 }
55 return n; 56 return n;
56} 57}
58#endif
57 59
60#ifdef arch_provides_topology_pointers
58#define define_siblings_show_map(name) \ 61#define define_siblings_show_map(name) \
59static inline ssize_t show_##name(struct sys_device *dev, char *buf) \ 62static ssize_t show_##name(struct sys_device *dev, char *buf) \
60{ \ 63{ \
61 unsigned int cpu = dev->id; \ 64 unsigned int cpu = dev->id; \
62 return show_cpumap(0, &(topology_##name(cpu)), buf); \ 65 return show_cpumap(0, &(topology_##name(cpu)), buf); \
63} 66}
64 67
65#define define_siblings_show_list(name) \ 68#define define_siblings_show_list(name) \
66static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ 69static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
67{ \ 70{ \
68 unsigned int cpu = dev->id; \ 71 unsigned int cpu = dev->id; \
69 return show_cpumap(1, &(topology_##name(cpu)), buf); \ 72 return show_cpumap(1, &(topology_##name(cpu)), buf); \
70} 73}
71 74
75#else
76#define define_siblings_show_map(name) \
77static ssize_t show_##name(struct sys_device *dev, char *buf) \
78{ \
79 unsigned int cpu = dev->id; \
80 cpumask_t mask = topology_##name(cpu); \
81 return show_cpumap(0, &mask, buf); \
82}
83
84#define define_siblings_show_list(name) \
85static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
86{ \
87 unsigned int cpu = dev->id; \
88 cpumask_t mask = topology_##name(cpu); \
89 return show_cpumap(1, &mask, buf); \
90}
91#endif
92
72#define define_siblings_show_func(name) \ 93#define define_siblings_show_func(name) \
73 define_siblings_show_map(name); define_siblings_show_list(name) 94 define_siblings_show_map(name); define_siblings_show_list(name)
74 95
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 13665db363d6..481ffe87c716 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -16,28 +16,9 @@
16#include <asm/page.h> /* PAGE_SIZE */ 16#include <asm/page.h> /* PAGE_SIZE */
17#include <asm/e820.h> 17#include <asm/e820.h>
18#include <asm/k8.h> 18#include <asm/k8.h>
19#include <asm/gart.h>
19#include "agp.h" 20#include "agp.h"
20 21
21/* PTE bits. */
22#define GPTE_VALID 1
23#define GPTE_COHERENT 2
24
25/* Aperture control register bits. */
26#define GARTEN (1<<0)
27#define DISGARTCPU (1<<4)
28#define DISGARTIO (1<<5)
29
30/* GART cache control register bits. */
31#define INVGART (1<<0)
32#define GARTPTEERR (1<<1)
33
34/* K8 On-cpu GART registers */
35#define AMD64_GARTAPERTURECTL 0x90
36#define AMD64_GARTAPERTUREBASE 0x94
37#define AMD64_GARTTABLEBASE 0x98
38#define AMD64_GARTCACHECTL 0x9c
39#define AMD64_GARTEN (1<<0)
40
41/* NVIDIA K8 registers */ 22/* NVIDIA K8 registers */
42#define NVIDIA_X86_64_0_APBASE 0x10 23#define NVIDIA_X86_64_0_APBASE 0x10
43#define NVIDIA_X86_64_1_APBASE1 0x50 24#define NVIDIA_X86_64_1_APBASE1 0x50
@@ -165,29 +146,18 @@ static int amd64_fetch_size(void)
165 * In a multiprocessor x86-64 system, this function gets 146 * In a multiprocessor x86-64 system, this function gets
166 * called once for each CPU. 147 * called once for each CPU.
167 */ 148 */
168static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table) 149static u64 amd64_configure(struct pci_dev *hammer, u64 gatt_table)
169{ 150{
170 u64 aperturebase; 151 u64 aperturebase;
171 u32 tmp; 152 u32 tmp;
172 u64 addr, aper_base; 153 u64 aper_base;
173 154
174 /* Address to map to */ 155 /* Address to map to */
175 pci_read_config_dword (hammer, AMD64_GARTAPERTUREBASE, &tmp); 156 pci_read_config_dword(hammer, AMD64_GARTAPERTUREBASE, &tmp);
176 aperturebase = tmp << 25; 157 aperturebase = tmp << 25;
177 aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK); 158 aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK);
178 159
179 /* address of the mappings table */ 160 enable_gart_translation(hammer, gatt_table);
180 addr = (u64) gatt_table;
181 addr >>= 12;
182 tmp = (u32) addr<<4;
183 tmp &= ~0xf;
184 pci_write_config_dword (hammer, AMD64_GARTTABLEBASE, tmp);
185
186 /* Enable GART translation for this hammer. */
187 pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp);
188 tmp |= GARTEN;
189 tmp &= ~(DISGARTCPU | DISGARTIO);
190 pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
191 161
192 return aper_base; 162 return aper_base;
193} 163}
@@ -226,9 +196,9 @@ static void amd64_cleanup(void)
226 for (i = 0; i < num_k8_northbridges; i++) { 196 for (i = 0; i < num_k8_northbridges; i++) {
227 struct pci_dev *dev = k8_northbridges[i]; 197 struct pci_dev *dev = k8_northbridges[i];
228 /* disable gart translation */ 198 /* disable gart translation */
229 pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp); 199 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
230 tmp &= ~AMD64_GARTEN; 200 tmp &= ~AMD64_GARTEN;
231 pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp); 201 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, tmp);
232 } 202 }
233} 203}
234 204
@@ -258,24 +228,10 @@ static const struct agp_bridge_driver amd_8151_driver = {
258}; 228};
259 229
260/* Some basic sanity checks for the aperture. */ 230/* Some basic sanity checks for the aperture. */
261static int __devinit aperture_valid(u64 aper, u32 size) 231static int __devinit agp_aperture_valid(u64 aper, u32 size)
262{ 232{
263 if (aper == 0) { 233 if (!aperture_valid(aper, size, 32*1024*1024))
264 printk(KERN_ERR PFX "No aperture\n");
265 return 0;
266 }
267 if (size < 32*1024*1024) {
268 printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20);
269 return 0;
270 }
271 if ((u64)aper + size > 0x100000000ULL) {
272 printk(KERN_ERR PFX "Aperture out of bounds\n");
273 return 0; 234 return 0;
274 }
275 if (e820_any_mapped(aper, aper + size, E820_RAM)) {
276 printk(KERN_ERR PFX "Aperture pointing to RAM\n");
277 return 0;
278 }
279 235
280 /* Request the Aperture. This catches cases when someone else 236 /* Request the Aperture. This catches cases when someone else
281 already put a mapping in there - happens with some very broken BIOS 237 already put a mapping in there - happens with some very broken BIOS
@@ -308,11 +264,11 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
308 u32 nb_order, nb_base; 264 u32 nb_order, nb_base;
309 u16 apsize; 265 u16 apsize;
310 266
311 pci_read_config_dword(nb, 0x90, &nb_order); 267 pci_read_config_dword(nb, AMD64_GARTAPERTURECTL, &nb_order);
312 nb_order = (nb_order >> 1) & 7; 268 nb_order = (nb_order >> 1) & 7;
313 pci_read_config_dword(nb, 0x94, &nb_base); 269 pci_read_config_dword(nb, AMD64_GARTAPERTUREBASE, &nb_base);
314 nb_aper = nb_base << 25; 270 nb_aper = nb_base << 25;
315 if (aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) { 271 if (agp_aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) {
316 return 0; 272 return 0;
317 } 273 }
318 274
@@ -331,12 +287,23 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
331 pci_read_config_dword(agp, 0x10, &aper_low); 287 pci_read_config_dword(agp, 0x10, &aper_low);
332 pci_read_config_dword(agp, 0x14, &aper_hi); 288 pci_read_config_dword(agp, 0x14, &aper_hi);
333 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); 289 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
290
291 /*
292 * On some sick chips APSIZE is 0. This means it wants 4G
293 * so let double check that order, and lets trust the AMD NB settings
294 */
295 if (order >=0 && aper + (32ULL<<(20 + order)) > 0x100000000ULL) {
296 printk(KERN_INFO "Aperture size %u MB is not right, using settings from NB\n",
297 32 << order);
298 order = nb_order;
299 }
300
334 printk(KERN_INFO PFX "Aperture from AGP @ %Lx size %u MB\n", aper, 32 << order); 301 printk(KERN_INFO PFX "Aperture from AGP @ %Lx size %u MB\n", aper, 32 << order);
335 if (order < 0 || !aperture_valid(aper, (32*1024*1024)<<order)) 302 if (order < 0 || !agp_aperture_valid(aper, (32*1024*1024)<<order))
336 return -1; 303 return -1;
337 304
338 pci_write_config_dword(nb, 0x90, order << 1); 305 pci_write_config_dword(nb, AMD64_GARTAPERTURECTL, order << 1);
339 pci_write_config_dword(nb, 0x94, aper >> 25); 306 pci_write_config_dword(nb, AMD64_GARTAPERTUREBASE, aper >> 25);
340 307
341 return 0; 308 return 0;
342} 309}
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
index dd68f8541c2d..db2ae4216279 100644
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -39,9 +39,14 @@ static int xencons_irq;
39 39
40/* ------------------------------------------------------------------ */ 40/* ------------------------------------------------------------------ */
41 41
42static unsigned long console_pfn = ~0ul;
43
42static inline struct xencons_interface *xencons_interface(void) 44static inline struct xencons_interface *xencons_interface(void)
43{ 45{
44 return mfn_to_virt(xen_start_info->console.domU.mfn); 46 if (console_pfn == ~0ul)
47 return mfn_to_virt(xen_start_info->console.domU.mfn);
48 else
49 return __va(console_pfn << PAGE_SHIFT);
45} 50}
46 51
47static inline void notify_daemon(void) 52static inline void notify_daemon(void)
@@ -101,20 +106,32 @@ static int __init xen_init(void)
101{ 106{
102 struct hvc_struct *hp; 107 struct hvc_struct *hp;
103 108
104 if (!is_running_on_xen()) 109 if (!is_running_on_xen() ||
105 return 0; 110 is_initial_xendomain() ||
111 !xen_start_info->console.domU.evtchn)
112 return -ENODEV;
106 113
107 xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); 114 xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
108 if (xencons_irq < 0) 115 if (xencons_irq < 0)
109 xencons_irq = 0 /* NO_IRQ */; 116 xencons_irq = 0; /* NO_IRQ */
117
110 hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); 118 hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
111 if (IS_ERR(hp)) 119 if (IS_ERR(hp))
112 return PTR_ERR(hp); 120 return PTR_ERR(hp);
113 121
114 hvc = hp; 122 hvc = hp;
123
124 console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn);
125
115 return 0; 126 return 0;
116} 127}
117 128
129void xen_console_resume(void)
130{
131 if (xencons_irq)
132 rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq);
133}
134
118static void __exit xen_fini(void) 135static void __exit xen_fini(void)
119{ 136{
120 if (hvc) 137 if (hvc)
@@ -134,12 +151,28 @@ module_init(xen_init);
134module_exit(xen_fini); 151module_exit(xen_fini);
135console_initcall(xen_cons_init); 152console_initcall(xen_cons_init);
136 153
154static void raw_console_write(const char *str, int len)
155{
156 while(len > 0) {
157 int rc = HYPERVISOR_console_io(CONSOLEIO_write, len, (char *)str);
158 if (rc <= 0)
159 break;
160
161 str += rc;
162 len -= rc;
163 }
164}
165
166#ifdef CONFIG_EARLY_PRINTK
137static void xenboot_write_console(struct console *console, const char *string, 167static void xenboot_write_console(struct console *console, const char *string,
138 unsigned len) 168 unsigned len)
139{ 169{
140 unsigned int linelen, off = 0; 170 unsigned int linelen, off = 0;
141 const char *pos; 171 const char *pos;
142 172
173 raw_console_write(string, len);
174
175 write_console(0, "(early) ", 8);
143 while (off < len && NULL != (pos = strchr(string+off, '\n'))) { 176 while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
144 linelen = pos-string+off; 177 linelen = pos-string+off;
145 if (off + linelen > len) 178 if (off + linelen > len)
@@ -155,5 +188,23 @@ static void xenboot_write_console(struct console *console, const char *string,
155struct console xenboot_console = { 188struct console xenboot_console = {
156 .name = "xenboot", 189 .name = "xenboot",
157 .write = xenboot_write_console, 190 .write = xenboot_write_console,
158 .flags = CON_PRINTBUFFER | CON_BOOT, 191 .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
159}; 192};
193#endif /* CONFIG_EARLY_PRINTK */
194
195void xen_raw_console_write(const char *str)
196{
197 raw_console_write(str, strlen(str));
198}
199
200void xen_raw_printk(const char *fmt, ...)
201{
202 static char buf[512];
203 va_list ap;
204
205 va_start(ap, fmt);
206 vsnprintf(buf, sizeof(buf), fmt, ap);
207 va_end(ap);
208
209 xen_raw_console_write(buf);
210}
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index dc2cec6127d1..ebb9e51deb0c 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -26,6 +26,16 @@ config EDD_OFF
26 kernel. Say N if you want EDD enabled by default. EDD can be dynamically set 26 kernel. Say N if you want EDD enabled by default. EDD can be dynamically set
27 using the kernel parameter 'edd={on|skipmbr|off}'. 27 using the kernel parameter 'edd={on|skipmbr|off}'.
28 28
29config FIRMWARE_MEMMAP
30 bool "Add firmware-provided memory map to sysfs" if EMBEDDED
31 default (X86_64 || X86_32)
32 help
33 Add the firmware-provided (unmodified) memory map to /sys/firmware/memmap.
34 That memory map is used for example by kexec to set up parameter area
35 for the next kernel, but can also be used for debugging purposes.
36
37 See also Documentation/ABI/testing/sysfs-firmware-memmap.
38
29config EFI_VARS 39config EFI_VARS
30 tristate "EFI Variable Support via sysfs" 40 tristate "EFI Variable Support via sysfs"
31 depends on EFI 41 depends on EFI
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 4c9147154df8..1c3c17343dbe 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_DCDBAS) += dcdbas.o
10obj-$(CONFIG_DMIID) += dmi-id.o 10obj-$(CONFIG_DMIID) += dmi-id.o
11obj-$(CONFIG_ISCSI_IBFT_FIND) += iscsi_ibft_find.o 11obj-$(CONFIG_ISCSI_IBFT_FIND) += iscsi_ibft_find.o
12obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o 12obj-$(CONFIG_ISCSI_IBFT) += iscsi_ibft.o
13obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index c5e3ed7e903b..455575be3560 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -8,6 +8,11 @@
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <asm/dmi.h> 9#include <asm/dmi.h>
10 10
11/*
12 * DMI stands for "Desktop Management Interface". It is part
13 * of and an antecedent to, SMBIOS, which stands for System
14 * Management BIOS. See further: http://www.dmtf.org/standards
15 */
11static char dmi_empty_string[] = " "; 16static char dmi_empty_string[] = " ";
12 17
13static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) 18static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
new file mode 100644
index 000000000000..e23399c7f773
--- /dev/null
+++ b/drivers/firmware/memmap.c
@@ -0,0 +1,205 @@
1/*
2 * linux/drivers/firmware/memmap.c
3 * Copyright (C) 2008 SUSE LINUX Products GmbH
4 * by Bernhard Walle <bwalle@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License v2.0 as published by
8 * the Free Software Foundation
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16
17#include <linux/string.h>
18#include <linux/firmware-map.h>
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/types.h>
22#include <linux/bootmem.h>
23
24/*
25 * Data types ------------------------------------------------------------------
26 */
27
28/*
29 * Firmware map entry. Because firmware memory maps are flat and not
30 * hierarchical, it's ok to organise them in a linked list. No parent
31 * information is necessary as for the resource tree.
32 */
33struct firmware_map_entry {
34 resource_size_t start; /* start of the memory range */
35 resource_size_t end; /* end of the memory range (incl.) */
36 const char *type; /* type of the memory range */
37 struct list_head list; /* entry for the linked list */
38 struct kobject kobj; /* kobject for each entry */
39};
40
41/*
42 * Forward declarations --------------------------------------------------------
43 */
44static ssize_t memmap_attr_show(struct kobject *kobj,
45 struct attribute *attr, char *buf);
46static ssize_t start_show(struct firmware_map_entry *entry, char *buf);
47static ssize_t end_show(struct firmware_map_entry *entry, char *buf);
48static ssize_t type_show(struct firmware_map_entry *entry, char *buf);
49
50/*
51 * Static data -----------------------------------------------------------------
52 */
53
54struct memmap_attribute {
55 struct attribute attr;
56 ssize_t (*show)(struct firmware_map_entry *entry, char *buf);
57};
58
59struct memmap_attribute memmap_start_attr = __ATTR_RO(start);
60struct memmap_attribute memmap_end_attr = __ATTR_RO(end);
61struct memmap_attribute memmap_type_attr = __ATTR_RO(type);
62
63/*
64 * These are default attributes that are added for every memmap entry.
65 */
66static struct attribute *def_attrs[] = {
67 &memmap_start_attr.attr,
68 &memmap_end_attr.attr,
69 &memmap_type_attr.attr,
70 NULL
71};
72
73static struct sysfs_ops memmap_attr_ops = {
74 .show = memmap_attr_show,
75};
76
77static struct kobj_type memmap_ktype = {
78 .sysfs_ops = &memmap_attr_ops,
79 .default_attrs = def_attrs,
80};
81
82/*
83 * Registration functions ------------------------------------------------------
84 */
85
86/*
87 * Firmware memory map entries
88 */
89static LIST_HEAD(map_entries);
90
91/**
92 * Common implementation of firmware_map_add() and firmware_map_add_early()
93 * which expects a pre-allocated struct firmware_map_entry.
94 *
95 * @start: Start of the memory range.
96 * @end: End of the memory range (inclusive).
97 * @type: Type of the memory range.
98 * @entry: Pre-allocated (either kmalloc() or bootmem allocator), uninitialised
99 * entry.
100 */
101static int firmware_map_add_entry(resource_size_t start, resource_size_t end,
102 const char *type,
103 struct firmware_map_entry *entry)
104{
105 BUG_ON(start > end);
106
107 entry->start = start;
108 entry->end = end;
109 entry->type = type;
110 INIT_LIST_HEAD(&entry->list);
111 kobject_init(&entry->kobj, &memmap_ktype);
112
113 list_add_tail(&entry->list, &map_entries);
114
115 return 0;
116}
117
118/*
119 * See <linux/firmware-map.h> for documentation.
120 */
121int firmware_map_add(resource_size_t start, resource_size_t end,
122 const char *type)
123{
124 struct firmware_map_entry *entry;
125
126 entry = kmalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
127 WARN_ON(!entry);
128 if (!entry)
129 return -ENOMEM;
130
131 return firmware_map_add_entry(start, end, type, entry);
132}
133
134/*
135 * See <linux/firmware-map.h> for documentation.
136 */
137int __init firmware_map_add_early(resource_size_t start, resource_size_t end,
138 const char *type)
139{
140 struct firmware_map_entry *entry;
141
142 entry = alloc_bootmem_low(sizeof(struct firmware_map_entry));
143 WARN_ON(!entry);
144 if (!entry)
145 return -ENOMEM;
146
147 return firmware_map_add_entry(start, end, type, entry);
148}
149
150/*
151 * Sysfs functions -------------------------------------------------------------
152 */
153
154static ssize_t start_show(struct firmware_map_entry *entry, char *buf)
155{
156 return snprintf(buf, PAGE_SIZE, "0x%llx\n", entry->start);
157}
158
159static ssize_t end_show(struct firmware_map_entry *entry, char *buf)
160{
161 return snprintf(buf, PAGE_SIZE, "0x%llx\n", entry->end);
162}
163
164static ssize_t type_show(struct firmware_map_entry *entry, char *buf)
165{
166 return snprintf(buf, PAGE_SIZE, "%s\n", entry->type);
167}
168
169#define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr)
170#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
171
172static ssize_t memmap_attr_show(struct kobject *kobj,
173 struct attribute *attr, char *buf)
174{
175 struct firmware_map_entry *entry = to_memmap_entry(kobj);
176 struct memmap_attribute *memmap_attr = to_memmap_attr(attr);
177
178 return memmap_attr->show(entry, buf);
179}
180
181/*
182 * Initialises stuff and adds the entries in the map_entries list to
183 * sysfs. Important is that firmware_map_add() and firmware_map_add_early()
184 * must be called before late_initcall.
185 */
186static int __init memmap_init(void)
187{
188 int i = 0;
189 struct firmware_map_entry *entry;
190 struct kset *memmap_kset;
191
192 memmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj);
193 WARN_ON(!memmap_kset);
194 if (!memmap_kset)
195 return -ENOMEM;
196
197 list_for_each_entry(entry, &map_entries, list) {
198 entry->kobj.kset = memmap_kset;
199 kobject_add(&entry->kobj, NULL, "%d", i++);
200 }
201
202 return 0;
203}
204late_initcall(memmap_init);
205
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
index 0f47f4697cdf..9ce3b3baf3a2 100644
--- a/drivers/input/xen-kbdfront.c
+++ b/drivers/input/xen-kbdfront.c
@@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id)
66 case XENKBD_TYPE_MOTION: 66 case XENKBD_TYPE_MOTION:
67 input_report_rel(dev, REL_X, event->motion.rel_x); 67 input_report_rel(dev, REL_X, event->motion.rel_x);
68 input_report_rel(dev, REL_Y, event->motion.rel_y); 68 input_report_rel(dev, REL_Y, event->motion.rel_y);
69 if (event->motion.rel_z)
70 input_report_rel(dev, REL_WHEEL,
71 -event->motion.rel_z);
69 break; 72 break;
70 case XENKBD_TYPE_KEY: 73 case XENKBD_TYPE_KEY:
71 dev = NULL; 74 dev = NULL;
@@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id)
84 case XENKBD_TYPE_POS: 87 case XENKBD_TYPE_POS:
85 input_report_abs(dev, ABS_X, event->pos.abs_x); 88 input_report_abs(dev, ABS_X, event->pos.abs_x);
86 input_report_abs(dev, ABS_Y, event->pos.abs_y); 89 input_report_abs(dev, ABS_Y, event->pos.abs_y);
90 if (event->pos.rel_z)
91 input_report_rel(dev, REL_WHEEL,
92 -event->pos.rel_z);
87 break; 93 break;
88 } 94 }
89 if (dev) 95 if (dev)
@@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev,
152 ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); 158 ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
153 for (i = BTN_LEFT; i <= BTN_TASK; i++) 159 for (i = BTN_LEFT; i <= BTN_TASK; i++)
154 set_bit(i, ptr->keybit); 160 set_bit(i, ptr->keybit);
155 ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); 161 ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL);
156 input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); 162 input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
157 input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); 163 input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
158 164
@@ -294,6 +300,16 @@ InitWait:
294 */ 300 */
295 if (dev->state != XenbusStateConnected) 301 if (dev->state != XenbusStateConnected)
296 goto InitWait; /* no InitWait seen yet, fudge it */ 302 goto InitWait; /* no InitWait seen yet, fudge it */
303
304 /* Set input abs params to match backend screen res */
305 if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
306 "width", "%d", &val) > 0)
307 input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0);
308
309 if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
310 "height", "%d", &val) > 0)
311 input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0);
312
297 break; 313 break;
298 314
299 case XenbusStateClosing: 315 case XenbusStateClosing:
@@ -337,4 +353,6 @@ static void __exit xenkbd_cleanup(void)
337module_init(xenkbd_init); 353module_init(xenkbd_init);
338module_exit(xenkbd_cleanup); 354module_exit(xenkbd_cleanup);
339 355
356MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend");
340MODULE_LICENSE("GPL"); 357MODULE_LICENSE("GPL");
358MODULE_ALIAS("xen:vkbd");
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 6b8dbb9ba73b..76f2b36881c3 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
1config LGUEST 1config LGUEST
2 tristate "Linux hypervisor example code" 2 tristate "Linux hypervisor example code"
3 depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !(X86_VISWS || X86_VOYAGER) 3 depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !X86_VOYAGER
4 select HVC_DRIVER 4 select HVC_DRIVER
5 ---help--- 5 ---help---
6 This is a very simple module which allows you to run 6 This is a very simple module which allows you to run
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 005bd045d2eb..5faefeaf6790 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -136,7 +136,6 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
136 * first step in the migration to the kernel types. pte_pfn is already defined 136 * first step in the migration to the kernel types. pte_pfn is already defined
137 * in the kernel. */ 137 * in the kernel. */
138#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK) 138#define pgd_flags(x) (pgd_val(x) & ~PAGE_MASK)
139#define pte_flags(x) (pte_val(x) & ~PAGE_MASK)
140#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) 139#define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT)
141 140
142/* interrupts_and_traps.c: */ 141/* interrupts_and_traps.c: */
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 66c0fd21894b..bb0642318a95 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1637,12 +1637,43 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1637} 1637}
1638 1638
1639#ifdef CONFIG_DMAR_GFX_WA 1639#ifdef CONFIG_DMAR_GFX_WA
1640extern int arch_get_ram_range(int slot, u64 *addr, u64 *size); 1640struct iommu_prepare_data {
1641 struct pci_dev *pdev;
1642 int ret;
1643};
1644
1645static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1646 unsigned long end_pfn, void *datax)
1647{
1648 struct iommu_prepare_data *data;
1649
1650 data = (struct iommu_prepare_data *)datax;
1651
1652 data->ret = iommu_prepare_identity_map(data->pdev,
1653 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1654 return data->ret;
1655
1656}
1657
1658static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1659{
1660 int nid;
1661 struct iommu_prepare_data data;
1662
1663 data.pdev = pdev;
1664 data.ret = 0;
1665
1666 for_each_online_node(nid) {
1667 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1668 if (data.ret)
1669 return data.ret;
1670 }
1671 return data.ret;
1672}
1673
1641static void __init iommu_prepare_gfx_mapping(void) 1674static void __init iommu_prepare_gfx_mapping(void)
1642{ 1675{
1643 struct pci_dev *pdev = NULL; 1676 struct pci_dev *pdev = NULL;
1644 u64 base, size;
1645 int slot;
1646 int ret; 1677 int ret;
1647 1678
1648 for_each_pci_dev(pdev) { 1679 for_each_pci_dev(pdev) {
@@ -1651,17 +1682,9 @@ static void __init iommu_prepare_gfx_mapping(void)
1651 continue; 1682 continue;
1652 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", 1683 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1653 pci_name(pdev)); 1684 pci_name(pdev));
1654 slot = arch_get_ram_range(0, &base, &size); 1685 ret = iommu_prepare_with_active_regions(pdev);
1655 while (slot >= 0) { 1686 if (ret)
1656 ret = iommu_prepare_identity_map(pdev, 1687 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1657 base, base + size);
1658 if (ret)
1659 goto error;
1660 slot = arch_get_ram_range(slot, &base, &size);
1661 }
1662 continue;
1663error:
1664 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1665 } 1688 }
1666} 1689}
1667#endif 1690#endif
diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c
index 4fb16240c04d..f5252c2552fd 100644
--- a/drivers/video/sgivwfb.c
+++ b/drivers/video/sgivwfb.c
@@ -21,8 +21,7 @@
21 21
22#include <asm/io.h> 22#include <asm/io.h>
23#include <asm/mtrr.h> 23#include <asm/mtrr.h>
24 24#include <asm/visws/sgivw.h>
25#include <setup_arch.h>
26 25
27#define INCLUDE_TIMING_TABLE_DATA 26#define INCLUDE_TIMING_TABLE_DATA
28#define DBE_REG_BASE par->regs 27#define DBE_REG_BASE par->regs
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index 619a6f8d65a2..47ed39b52f9c 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -18,6 +18,7 @@
18 * frame buffer. 18 * frame buffer.
19 */ 19 */
20 20
21#include <linux/console.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
22#include <linux/errno.h> 23#include <linux/errno.h>
23#include <linux/fb.h> 24#include <linux/fb.h>
@@ -42,37 +43,68 @@ struct xenfb_info {
42 struct xenfb_page *page; 43 struct xenfb_page *page;
43 unsigned long *mfns; 44 unsigned long *mfns;
44 int update_wanted; /* XENFB_TYPE_UPDATE wanted */ 45 int update_wanted; /* XENFB_TYPE_UPDATE wanted */
46 int feature_resize; /* XENFB_TYPE_RESIZE ok */
47 struct xenfb_resize resize; /* protected by resize_lock */
48 int resize_dpy; /* ditto */
49 spinlock_t resize_lock;
45 50
46 struct xenbus_device *xbdev; 51 struct xenbus_device *xbdev;
47}; 52};
48 53
49static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; 54#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8)
50 55
56enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT };
57static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT };
58module_param_array(video, int, NULL, 0);
59MODULE_PARM_DESC(video,
60 "Video memory size in MB, width, height in pixels (default 2,800,600)");
61
62static void xenfb_make_preferred_console(void);
51static int xenfb_remove(struct xenbus_device *); 63static int xenfb_remove(struct xenbus_device *);
52static void xenfb_init_shared_page(struct xenfb_info *); 64static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
53static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); 65static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
54static void xenfb_disconnect_backend(struct xenfb_info *); 66static void xenfb_disconnect_backend(struct xenfb_info *);
55 67
68static void xenfb_send_event(struct xenfb_info *info,
69 union xenfb_out_event *event)
70{
71 u32 prod;
72
73 prod = info->page->out_prod;
74 /* caller ensures !xenfb_queue_full() */
75 mb(); /* ensure ring space available */
76 XENFB_OUT_RING_REF(info->page, prod) = *event;
77 wmb(); /* ensure ring contents visible */
78 info->page->out_prod = prod + 1;
79
80 notify_remote_via_irq(info->irq);
81}
82
56static void xenfb_do_update(struct xenfb_info *info, 83static void xenfb_do_update(struct xenfb_info *info,
57 int x, int y, int w, int h) 84 int x, int y, int w, int h)
58{ 85{
59 union xenfb_out_event event; 86 union xenfb_out_event event;
60 u32 prod;
61 87
88 memset(&event, 0, sizeof(event));
62 event.type = XENFB_TYPE_UPDATE; 89 event.type = XENFB_TYPE_UPDATE;
63 event.update.x = x; 90 event.update.x = x;
64 event.update.y = y; 91 event.update.y = y;
65 event.update.width = w; 92 event.update.width = w;
66 event.update.height = h; 93 event.update.height = h;
67 94
68 prod = info->page->out_prod;
69 /* caller ensures !xenfb_queue_full() */ 95 /* caller ensures !xenfb_queue_full() */
70 mb(); /* ensure ring space available */ 96 xenfb_send_event(info, &event);
71 XENFB_OUT_RING_REF(info->page, prod) = event; 97}
72 wmb(); /* ensure ring contents visible */
73 info->page->out_prod = prod + 1;
74 98
75 notify_remote_via_irq(info->irq); 99static void xenfb_do_resize(struct xenfb_info *info)
100{
101 union xenfb_out_event event;
102
103 memset(&event, 0, sizeof(event));
104 event.resize = info->resize;
105
106 /* caller ensures !xenfb_queue_full() */
107 xenfb_send_event(info, &event);
76} 108}
77 109
78static int xenfb_queue_full(struct xenfb_info *info) 110static int xenfb_queue_full(struct xenfb_info *info)
@@ -84,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info)
84 return prod - cons == XENFB_OUT_RING_LEN; 116 return prod - cons == XENFB_OUT_RING_LEN;
85} 117}
86 118
119static void xenfb_handle_resize_dpy(struct xenfb_info *info)
120{
121 unsigned long flags;
122
123 spin_lock_irqsave(&info->resize_lock, flags);
124 if (info->resize_dpy) {
125 if (!xenfb_queue_full(info)) {
126 info->resize_dpy = 0;
127 xenfb_do_resize(info);
128 }
129 }
130 spin_unlock_irqrestore(&info->resize_lock, flags);
131}
132
87static void xenfb_refresh(struct xenfb_info *info, 133static void xenfb_refresh(struct xenfb_info *info,
88 int x1, int y1, int w, int h) 134 int x1, int y1, int w, int h)
89{ 135{
90 unsigned long flags; 136 unsigned long flags;
91 int y2 = y1 + h - 1;
92 int x2 = x1 + w - 1; 137 int x2 = x1 + w - 1;
138 int y2 = y1 + h - 1;
139
140 xenfb_handle_resize_dpy(info);
93 141
94 if (!info->update_wanted) 142 if (!info->update_wanted)
95 return; 143 return;
@@ -222,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf,
222 return res; 270 return res;
223} 271}
224 272
273static int
274xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
275{
276 struct xenfb_info *xenfb_info;
277 int required_mem_len;
278
279 xenfb_info = info->par;
280
281 if (!xenfb_info->feature_resize) {
282 if (var->xres == video[KPARAM_WIDTH] &&
283 var->yres == video[KPARAM_HEIGHT] &&
284 var->bits_per_pixel == xenfb_info->page->depth) {
285 return 0;
286 }
287 return -EINVAL;
288 }
289
290 /* Can't resize past initial width and height */
291 if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT])
292 return -EINVAL;
293
294 required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8;
295 if (var->bits_per_pixel == xenfb_info->page->depth &&
296 var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) &&
297 required_mem_len <= info->fix.smem_len) {
298 var->xres_virtual = var->xres;
299 var->yres_virtual = var->yres;
300 return 0;
301 }
302 return -EINVAL;
303}
304
305static int xenfb_set_par(struct fb_info *info)
306{
307 struct xenfb_info *xenfb_info;
308 unsigned long flags;
309
310 xenfb_info = info->par;
311
312 spin_lock_irqsave(&xenfb_info->resize_lock, flags);
313 xenfb_info->resize.type = XENFB_TYPE_RESIZE;
314 xenfb_info->resize.width = info->var.xres;
315 xenfb_info->resize.height = info->var.yres;
316 xenfb_info->resize.stride = info->fix.line_length;
317 xenfb_info->resize.depth = info->var.bits_per_pixel;
318 xenfb_info->resize.offset = 0;
319 xenfb_info->resize_dpy = 1;
320 spin_unlock_irqrestore(&xenfb_info->resize_lock, flags);
321 return 0;
322}
323
225static struct fb_ops xenfb_fb_ops = { 324static struct fb_ops xenfb_fb_ops = {
226 .owner = THIS_MODULE, 325 .owner = THIS_MODULE,
227 .fb_read = fb_sys_read, 326 .fb_read = fb_sys_read,
@@ -230,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = {
230 .fb_fillrect = xenfb_fillrect, 329 .fb_fillrect = xenfb_fillrect,
231 .fb_copyarea = xenfb_copyarea, 330 .fb_copyarea = xenfb_copyarea,
232 .fb_imageblit = xenfb_imageblit, 331 .fb_imageblit = xenfb_imageblit,
332 .fb_check_var = xenfb_check_var,
333 .fb_set_par = xenfb_set_par,
233}; 334};
234 335
235static irqreturn_t xenfb_event_handler(int rq, void *dev_id) 336static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
@@ -258,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
258{ 359{
259 struct xenfb_info *info; 360 struct xenfb_info *info;
260 struct fb_info *fb_info; 361 struct fb_info *fb_info;
362 int fb_size;
363 int val;
261 int ret; 364 int ret;
262 365
263 info = kzalloc(sizeof(*info), GFP_KERNEL); 366 info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -265,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
265 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); 368 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
266 return -ENOMEM; 369 return -ENOMEM;
267 } 370 }
371
372 /* Limit kernel param videoram amount to what is in xenstore */
373 if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) {
374 if (val < video[KPARAM_MEM])
375 video[KPARAM_MEM] = val;
376 }
377
378 /* If requested res does not fit in available memory, use default */
379 fb_size = video[KPARAM_MEM] * 1024 * 1024;
380 if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8
381 > fb_size) {
382 video[KPARAM_WIDTH] = XENFB_WIDTH;
383 video[KPARAM_HEIGHT] = XENFB_HEIGHT;
384 fb_size = XENFB_DEFAULT_FB_LEN;
385 }
386
268 dev->dev.driver_data = info; 387 dev->dev.driver_data = info;
269 info->xbdev = dev; 388 info->xbdev = dev;
270 info->irq = -1; 389 info->irq = -1;
271 info->x1 = info->y1 = INT_MAX; 390 info->x1 = info->y1 = INT_MAX;
272 spin_lock_init(&info->dirty_lock); 391 spin_lock_init(&info->dirty_lock);
392 spin_lock_init(&info->resize_lock);
273 393
274 info->fb = vmalloc(xenfb_mem_len); 394 info->fb = vmalloc(fb_size);
275 if (info->fb == NULL) 395 if (info->fb == NULL)
276 goto error_nomem; 396 goto error_nomem;
277 memset(info->fb, 0, xenfb_mem_len); 397 memset(info->fb, 0, fb_size);
278 398
279 info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 399 info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
280 400
281 info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); 401 info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
282 if (!info->mfns) 402 if (!info->mfns)
@@ -287,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
287 if (!info->page) 407 if (!info->page)
288 goto error_nomem; 408 goto error_nomem;
289 409
290 xenfb_init_shared_page(info);
291
292 /* abusing framebuffer_alloc() to allocate pseudo_palette */ 410 /* abusing framebuffer_alloc() to allocate pseudo_palette */
293 fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); 411 fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
294 if (fb_info == NULL) 412 if (fb_info == NULL)
@@ -301,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
301 fb_info->screen_base = info->fb; 419 fb_info->screen_base = info->fb;
302 420
303 fb_info->fbops = &xenfb_fb_ops; 421 fb_info->fbops = &xenfb_fb_ops;
304 fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; 422 fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH];
305 fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; 423 fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT];
306 fb_info->var.bits_per_pixel = info->page->depth; 424 fb_info->var.bits_per_pixel = XENFB_DEPTH;
307 425
308 fb_info->var.red = (struct fb_bitfield){16, 8, 0}; 426 fb_info->var.red = (struct fb_bitfield){16, 8, 0};
309 fb_info->var.green = (struct fb_bitfield){8, 8, 0}; 427 fb_info->var.green = (struct fb_bitfield){8, 8, 0};
@@ -315,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
315 fb_info->var.vmode = FB_VMODE_NONINTERLACED; 433 fb_info->var.vmode = FB_VMODE_NONINTERLACED;
316 434
317 fb_info->fix.visual = FB_VISUAL_TRUECOLOR; 435 fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
318 fb_info->fix.line_length = info->page->line_length; 436 fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8;
319 fb_info->fix.smem_start = 0; 437 fb_info->fix.smem_start = 0;
320 fb_info->fix.smem_len = xenfb_mem_len; 438 fb_info->fix.smem_len = fb_size;
321 strcpy(fb_info->fix.id, "xen"); 439 strcpy(fb_info->fix.id, "xen");
322 fb_info->fix.type = FB_TYPE_PACKED_PIXELS; 440 fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
323 fb_info->fix.accel = FB_ACCEL_NONE; 441 fb_info->fix.accel = FB_ACCEL_NONE;
@@ -334,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
334 fb_info->fbdefio = &xenfb_defio; 452 fb_info->fbdefio = &xenfb_defio;
335 fb_deferred_io_init(fb_info); 453 fb_deferred_io_init(fb_info);
336 454
455 xenfb_init_shared_page(info, fb_info);
456
337 ret = register_framebuffer(fb_info); 457 ret = register_framebuffer(fb_info);
338 if (ret) { 458 if (ret) {
339 fb_deferred_io_cleanup(fb_info); 459 fb_deferred_io_cleanup(fb_info);
@@ -348,6 +468,7 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
348 if (ret < 0) 468 if (ret < 0)
349 goto error; 469 goto error;
350 470
471 xenfb_make_preferred_console();
351 return 0; 472 return 0;
352 473
353 error_nomem: 474 error_nomem:
@@ -358,12 +479,34 @@ static int __devinit xenfb_probe(struct xenbus_device *dev,
358 return ret; 479 return ret;
359} 480}
360 481
482static __devinit void
483xenfb_make_preferred_console(void)
484{
485 struct console *c;
486
487 if (console_set_on_cmdline)
488 return;
489
490 acquire_console_sem();
491 for (c = console_drivers; c; c = c->next) {
492 if (!strcmp(c->name, "tty") && c->index == 0)
493 break;
494 }
495 release_console_sem();
496 if (c) {
497 unregister_console(c);
498 c->flags |= CON_CONSDEV;
499 c->flags &= ~CON_PRINTBUFFER; /* don't print again */
500 register_console(c);
501 }
502}
503
361static int xenfb_resume(struct xenbus_device *dev) 504static int xenfb_resume(struct xenbus_device *dev)
362{ 505{
363 struct xenfb_info *info = dev->dev.driver_data; 506 struct xenfb_info *info = dev->dev.driver_data;
364 507
365 xenfb_disconnect_backend(info); 508 xenfb_disconnect_backend(info);
366 xenfb_init_shared_page(info); 509 xenfb_init_shared_page(info, info->fb_info);
367 return xenfb_connect_backend(dev, info); 510 return xenfb_connect_backend(dev, info);
368} 511}
369 512
@@ -391,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address)
391 return pfn_to_mfn(vmalloc_to_pfn(address)); 534 return pfn_to_mfn(vmalloc_to_pfn(address));
392} 535}
393 536
394static void xenfb_init_shared_page(struct xenfb_info *info) 537static void xenfb_init_shared_page(struct xenfb_info *info,
538 struct fb_info *fb_info)
395{ 539{
396 int i; 540 int i;
541 int epd = PAGE_SIZE / sizeof(info->mfns[0]);
397 542
398 for (i = 0; i < info->nr_pages; i++) 543 for (i = 0; i < info->nr_pages; i++)
399 info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); 544 info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
400 545
401 info->page->pd[0] = vmalloc_to_mfn(info->mfns); 546 for (i = 0; i * epd < info->nr_pages; i++)
402 info->page->pd[1] = 0; 547 info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
403 info->page->width = XENFB_WIDTH; 548
404 info->page->height = XENFB_HEIGHT; 549 info->page->width = fb_info->var.xres;
405 info->page->depth = XENFB_DEPTH; 550 info->page->height = fb_info->var.yres;
406 info->page->line_length = (info->page->depth / 8) * info->page->width; 551 info->page->depth = fb_info->var.bits_per_pixel;
407 info->page->mem_length = xenfb_mem_len; 552 info->page->line_length = fb_info->fix.line_length;
553 info->page->mem_length = fb_info->fix.smem_len;
408 info->page->in_cons = info->page->in_prod = 0; 554 info->page->in_cons = info->page->in_prod = 0;
409 info->page->out_cons = info->page->out_prod = 0; 555 info->page->out_cons = info->page->out_prod = 0;
410} 556}
@@ -504,6 +650,11 @@ InitWait:
504 val = 0; 650 val = 0;
505 if (val) 651 if (val)
506 info->update_wanted = 1; 652 info->update_wanted = 1;
653
654 if (xenbus_scanf(XBT_NIL, dev->otherend,
655 "feature-resize", "%d", &val) < 0)
656 val = 0;
657 info->feature_resize = val;
507 break; 658 break;
508 659
509 case XenbusStateClosing: 660 case XenbusStateClosing:
@@ -547,4 +698,6 @@ static void __exit xenfb_cleanup(void)
547module_init(xenfb_init); 698module_init(xenfb_init);
548module_exit(xenfb_cleanup); 699module_exit(xenfb_cleanup);
549 700
701MODULE_DESCRIPTION("Xen virtual framebuffer device frontend");
550MODULE_LICENSE("GPL"); 702MODULE_LICENSE("GPL");
703MODULE_ALIAS("xen:vfb");
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 37af04f1ffd9..363286c54290 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,4 @@
1obj-y += grant-table.o features.o events.o 1obj-y += grant-table.o features.o events.o manage.o
2obj-y += xenbus/ 2obj-y += xenbus/
3obj-$(CONFIG_XEN_XENCOMM) += xencomm.o 3obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
4obj-$(CONFIG_XEN_BALLOON) += balloon.o 4obj-$(CONFIG_XEN_BALLOON) += balloon.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index ab25ba6cbbb9..591bc29b55f5 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -225,7 +225,7 @@ static int increase_reservation(unsigned long nr_pages)
225 page = balloon_next_page(page); 225 page = balloon_next_page(page);
226 } 226 }
227 227
228 reservation.extent_start = (unsigned long)frame_list; 228 set_xen_guest_handle(reservation.extent_start, frame_list);
229 reservation.nr_extents = nr_pages; 229 reservation.nr_extents = nr_pages;
230 rc = HYPERVISOR_memory_op( 230 rc = HYPERVISOR_memory_op(
231 XENMEM_populate_physmap, &reservation); 231 XENMEM_populate_physmap, &reservation);
@@ -321,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages)
321 balloon_append(pfn_to_page(pfn)); 321 balloon_append(pfn_to_page(pfn));
322 } 322 }
323 323
324 reservation.extent_start = (unsigned long)frame_list; 324 set_xen_guest_handle(reservation.extent_start, frame_list);
325 reservation.nr_extents = nr_pages; 325 reservation.nr_extents = nr_pages;
326 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 326 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
327 BUG_ON(ret != nr_pages); 327 BUG_ON(ret != nr_pages);
@@ -368,7 +368,7 @@ static void balloon_process(struct work_struct *work)
368} 368}
369 369
370/* Resets the Xen limit, sets new target, and kicks off processing. */ 370/* Resets the Xen limit, sets new target, and kicks off processing. */
371void balloon_set_new_target(unsigned long target) 371static void balloon_set_new_target(unsigned long target)
372{ 372{
373 /* No need for lock. Not read-modify-write updates. */ 373 /* No need for lock. Not read-modify-write updates. */
374 balloon_stats.hard_limit = ~0UL; 374 balloon_stats.hard_limit = ~0UL;
@@ -483,7 +483,7 @@ static int dealloc_pte_fn(
483 .extent_order = 0, 483 .extent_order = 0,
484 .domid = DOMID_SELF 484 .domid = DOMID_SELF
485 }; 485 };
486 reservation.extent_start = (unsigned long)&mfn; 486 set_xen_guest_handle(reservation.extent_start, &mfn);
487 set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); 487 set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
488 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); 488 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
489 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 489 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
@@ -519,7 +519,7 @@ static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
519 .extent_order = 0, 519 .extent_order = 0,
520 .domid = DOMID_SELF 520 .domid = DOMID_SELF
521 }; 521 };
522 reservation.extent_start = (unsigned long)&gmfn; 522 set_xen_guest_handle(reservation.extent_start, &gmfn);
523 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 523 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
524 &reservation); 524 &reservation);
525 if (ret == 1) 525 if (ret == 1)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 76e5b7386af9..332dd63750a0 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -355,7 +355,7 @@ static void unbind_from_irq(unsigned int irq)
355 355
356 spin_lock(&irq_mapping_update_lock); 356 spin_lock(&irq_mapping_update_lock);
357 357
358 if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { 358 if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
359 close.port = evtchn; 359 close.port = evtchn;
360 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 360 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
361 BUG(); 361 BUG();
@@ -375,7 +375,7 @@ static void unbind_from_irq(unsigned int irq)
375 evtchn_to_irq[evtchn] = -1; 375 evtchn_to_irq[evtchn] = -1;
376 irq_info[irq] = IRQ_UNBOUND; 376 irq_info[irq] = IRQ_UNBOUND;
377 377
378 dynamic_irq_init(irq); 378 dynamic_irq_cleanup(irq);
379 } 379 }
380 380
381 spin_unlock(&irq_mapping_update_lock); 381 spin_unlock(&irq_mapping_update_lock);
@@ -557,6 +557,33 @@ out:
557 put_cpu(); 557 put_cpu();
558} 558}
559 559
560/* Rebind a new event channel to an existing irq. */
561void rebind_evtchn_irq(int evtchn, int irq)
562{
563 /* Make sure the irq is masked, since the new event channel
564 will also be masked. */
565 disable_irq(irq);
566
567 spin_lock(&irq_mapping_update_lock);
568
569 /* After resume the irq<->evtchn mappings are all cleared out */
570 BUG_ON(evtchn_to_irq[evtchn] != -1);
571 /* Expect irq to have been bound before,
572 so the bindcount should be non-0 */
573 BUG_ON(irq_bindcount[irq] == 0);
574
575 evtchn_to_irq[evtchn] = irq;
576 irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
577
578 spin_unlock(&irq_mapping_update_lock);
579
580 /* new event channels are always bound to cpu 0 */
581 irq_set_affinity(irq, cpumask_of_cpu(0));
582
583 /* Unmask the event channel. */
584 enable_irq(irq);
585}
586
560/* Rebind an evtchn so that it gets delivered to a specific cpu */ 587/* Rebind an evtchn so that it gets delivered to a specific cpu */
561static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) 588static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
562{ 589{
@@ -647,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq)
647 return ret; 674 return ret;
648} 675}
649 676
677static void restore_cpu_virqs(unsigned int cpu)
678{
679 struct evtchn_bind_virq bind_virq;
680 int virq, irq, evtchn;
681
682 for (virq = 0; virq < NR_VIRQS; virq++) {
683 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
684 continue;
685
686 BUG_ON(irq_info[irq].type != IRQT_VIRQ);
687 BUG_ON(irq_info[irq].index != virq);
688
689 /* Get a new binding from Xen. */
690 bind_virq.virq = virq;
691 bind_virq.vcpu = cpu;
692 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
693 &bind_virq) != 0)
694 BUG();
695 evtchn = bind_virq.port;
696
697 /* Record the new mapping. */
698 evtchn_to_irq[evtchn] = irq;
699 irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
700 bind_evtchn_to_cpu(evtchn, cpu);
701
702 /* Ready for use. */
703 unmask_evtchn(evtchn);
704 }
705}
706
707static void restore_cpu_ipis(unsigned int cpu)
708{
709 struct evtchn_bind_ipi bind_ipi;
710 int ipi, irq, evtchn;
711
712 for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
713 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
714 continue;
715
716 BUG_ON(irq_info[irq].type != IRQT_IPI);
717 BUG_ON(irq_info[irq].index != ipi);
718
719 /* Get a new binding from Xen. */
720 bind_ipi.vcpu = cpu;
721 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
722 &bind_ipi) != 0)
723 BUG();
724 evtchn = bind_ipi.port;
725
726 /* Record the new mapping. */
727 evtchn_to_irq[evtchn] = irq;
728 irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
729 bind_evtchn_to_cpu(evtchn, cpu);
730
731 /* Ready for use. */
732 unmask_evtchn(evtchn);
733
734 }
735}
736
737void xen_irq_resume(void)
738{
739 unsigned int cpu, irq, evtchn;
740
741 init_evtchn_cpu_bindings();
742
743 /* New event-channel space is not 'live' yet. */
744 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
745 mask_evtchn(evtchn);
746
747 /* No IRQ <-> event-channel mappings. */
748 for (irq = 0; irq < NR_IRQS; irq++)
749 irq_info[irq].evtchn = 0; /* zap event-channel binding */
750
751 for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
752 evtchn_to_irq[evtchn] = -1;
753
754 for_each_possible_cpu(cpu) {
755 restore_cpu_virqs(cpu);
756 restore_cpu_ipis(cpu);
757 }
758}
759
650static struct irq_chip xen_dynamic_chip __read_mostly = { 760static struct irq_chip xen_dynamic_chip __read_mostly = {
651 .name = "xen-dyn", 761 .name = "xen-dyn",
652 .mask = disable_dynirq, 762 .mask = disable_dynirq,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 52b6b41b909d..e9e11168616a 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
471 return 0; 471 return 0;
472} 472}
473 473
474static int gnttab_resume(void) 474int gnttab_resume(void)
475{ 475{
476 if (max_nr_grant_frames() < nr_grant_frames) 476 if (max_nr_grant_frames() < nr_grant_frames)
477 return -ENOSYS; 477 return -ENOSYS;
478 return gnttab_map(0, nr_grant_frames - 1); 478 return gnttab_map(0, nr_grant_frames - 1);
479} 479}
480 480
481static int gnttab_suspend(void) 481int gnttab_suspend(void)
482{ 482{
483 arch_gnttab_unmap_shared(shared, nr_grant_frames); 483 arch_gnttab_unmap_shared(shared, nr_grant_frames);
484 return 0; 484 return 0;
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
new file mode 100644
index 000000000000..5b546e365f00
--- /dev/null
+++ b/drivers/xen/manage.c
@@ -0,0 +1,252 @@
1/*
2 * Handle extern requests for shutdown, reboot and sysrq
3 */
4#include <linux/kernel.h>
5#include <linux/err.h>
6#include <linux/reboot.h>
7#include <linux/sysrq.h>
8#include <linux/stop_machine.h>
9#include <linux/freezer.h>
10
11#include <xen/xenbus.h>
12#include <xen/grant_table.h>
13#include <xen/events.h>
14#include <xen/hvc-console.h>
15#include <xen/xen-ops.h>
16
17#include <asm/xen/hypercall.h>
18#include <asm/xen/page.h>
19
20enum shutdown_state {
21 SHUTDOWN_INVALID = -1,
22 SHUTDOWN_POWEROFF = 0,
23 SHUTDOWN_SUSPEND = 2,
24 /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only
25 report a crash, not be instructed to crash!
26 HALT is the same as POWEROFF, as far as we're concerned. The tools use
27 the distinction when we return the reason code to them. */
28 SHUTDOWN_HALT = 4,
29};
30
31/* Ignore multiple shutdown requests. */
32static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
33
34#ifdef CONFIG_PM_SLEEP
35static int xen_suspend(void *data)
36{
37 int *cancelled = data;
38 int err;
39
40 BUG_ON(!irqs_disabled());
41
42 load_cr3(swapper_pg_dir);
43
44 err = device_power_down(PMSG_SUSPEND);
45 if (err) {
46 printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n",
47 err);
48 return err;
49 }
50
51 xen_mm_pin_all();
52 gnttab_suspend();
53 xen_pre_suspend();
54
55 /*
56 * This hypercall returns 1 if suspend was cancelled
57 * or the domain was merely checkpointed, and 0 if it
58 * is resuming in a new domain.
59 */
60 *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
61
62 xen_post_suspend(*cancelled);
63 gnttab_resume();
64 xen_mm_unpin_all();
65
66 device_power_up();
67
68 if (!*cancelled) {
69 xen_irq_resume();
70 xen_console_resume();
71 }
72
73 return 0;
74}
75
76static void do_suspend(void)
77{
78 int err;
79 int cancelled = 1;
80
81 shutting_down = SHUTDOWN_SUSPEND;
82
83#ifdef CONFIG_PREEMPT
84 /* If the kernel is preemptible, we need to freeze all the processes
85 to prevent them from being in the middle of a pagetable update
86 during suspend. */
87 err = freeze_processes();
88 if (err) {
89 printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
90 return;
91 }
92#endif
93
94 err = device_suspend(PMSG_SUSPEND);
95 if (err) {
96 printk(KERN_ERR "xen suspend: device_suspend %d\n", err);
97 goto out;
98 }
99
100 printk("suspending xenbus...\n");
101 /* XXX use normal device tree? */
102 xenbus_suspend();
103
104 err = stop_machine_run(xen_suspend, &cancelled, 0);
105 if (err) {
106 printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
107 goto out;
108 }
109
110 if (!cancelled)
111 xenbus_resume();
112 else
113 xenbus_suspend_cancel();
114
115 device_resume();
116
117 /* Make sure timer events get retriggered on all CPUs */
118 clock_was_set();
119out:
120#ifdef CONFIG_PREEMPT
121 thaw_processes();
122#endif
123 shutting_down = SHUTDOWN_INVALID;
124}
125#endif /* CONFIG_PM_SLEEP */
126
127static void shutdown_handler(struct xenbus_watch *watch,
128 const char **vec, unsigned int len)
129{
130 char *str;
131 struct xenbus_transaction xbt;
132 int err;
133
134 if (shutting_down != SHUTDOWN_INVALID)
135 return;
136
137 again:
138 err = xenbus_transaction_start(&xbt);
139 if (err)
140 return;
141
142 str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
143 /* Ignore read errors and empty reads. */
144 if (XENBUS_IS_ERR_READ(str)) {
145 xenbus_transaction_end(xbt, 1);
146 return;
147 }
148
149 xenbus_write(xbt, "control", "shutdown", "");
150
151 err = xenbus_transaction_end(xbt, 0);
152 if (err == -EAGAIN) {
153 kfree(str);
154 goto again;
155 }
156
157 if (strcmp(str, "poweroff") == 0 ||
158 strcmp(str, "halt") == 0) {
159 shutting_down = SHUTDOWN_POWEROFF;
160 orderly_poweroff(false);
161 } else if (strcmp(str, "reboot") == 0) {
162 shutting_down = SHUTDOWN_POWEROFF; /* ? */
163 ctrl_alt_del();
164#ifdef CONFIG_PM_SLEEP
165 } else if (strcmp(str, "suspend") == 0) {
166 do_suspend();
167#endif
168 } else {
169 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
170 shutting_down = SHUTDOWN_INVALID;
171 }
172
173 kfree(str);
174}
175
176static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
177 unsigned int len)
178{
179 char sysrq_key = '\0';
180 struct xenbus_transaction xbt;
181 int err;
182
183 again:
184 err = xenbus_transaction_start(&xbt);
185 if (err)
186 return;
187 if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
188 printk(KERN_ERR "Unable to read sysrq code in "
189 "control/sysrq\n");
190 xenbus_transaction_end(xbt, 1);
191 return;
192 }
193
194 if (sysrq_key != '\0')
195 xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
196
197 err = xenbus_transaction_end(xbt, 0);
198 if (err == -EAGAIN)
199 goto again;
200
201 if (sysrq_key != '\0')
202 handle_sysrq(sysrq_key, NULL);
203}
204
205static struct xenbus_watch shutdown_watch = {
206 .node = "control/shutdown",
207 .callback = shutdown_handler
208};
209
210static struct xenbus_watch sysrq_watch = {
211 .node = "control/sysrq",
212 .callback = sysrq_handler
213};
214
215static int setup_shutdown_watcher(void)
216{
217 int err;
218
219 err = register_xenbus_watch(&shutdown_watch);
220 if (err) {
221 printk(KERN_ERR "Failed to set shutdown watcher\n");
222 return err;
223 }
224
225 err = register_xenbus_watch(&sysrq_watch);
226 if (err) {
227 printk(KERN_ERR "Failed to set sysrq watcher\n");
228 return err;
229 }
230
231 return 0;
232}
233
234static int shutdown_event(struct notifier_block *notifier,
235 unsigned long event,
236 void *data)
237{
238 setup_shutdown_watcher();
239 return NOTIFY_DONE;
240}
241
242static int __init setup_shutdown_event(void)
243{
244 static struct notifier_block xenstore_notifier = {
245 .notifier_call = shutdown_event
246 };
247 register_xenstore_notifier(&xenstore_notifier);
248
249 return 0;
250}
251
252subsys_initcall(setup_shutdown_event);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 6efbe3f29ca5..090c61ee8fd0 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -203,7 +203,6 @@ int xb_read(void *data, unsigned len)
203int xb_init_comms(void) 203int xb_init_comms(void)
204{ 204{
205 struct xenstore_domain_interface *intf = xen_store_interface; 205 struct xenstore_domain_interface *intf = xen_store_interface;
206 int err;
207 206
208 if (intf->req_prod != intf->req_cons) 207 if (intf->req_prod != intf->req_cons)
209 printk(KERN_ERR "XENBUS request ring is not quiescent " 208 printk(KERN_ERR "XENBUS request ring is not quiescent "
@@ -216,18 +215,20 @@ int xb_init_comms(void)
216 intf->rsp_cons = intf->rsp_prod; 215 intf->rsp_cons = intf->rsp_prod;
217 } 216 }
218 217
219 if (xenbus_irq) 218 if (xenbus_irq) {
220 unbind_from_irqhandler(xenbus_irq, &xb_waitq); 219 /* Already have an irq; assume we're resuming */
220 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
221 } else {
222 int err;
223 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
224 0, "xenbus", &xb_waitq);
225 if (err <= 0) {
226 printk(KERN_ERR "XENBUS request irq failed %i\n", err);
227 return err;
228 }
221 229
222 err = bind_evtchn_to_irqhandler( 230 xenbus_irq = err;
223 xen_store_evtchn, wake_waiting,
224 0, "xenbus", &xb_waitq);
225 if (err <= 0) {
226 printk(KERN_ERR "XENBUS request irq failed %i\n", err);
227 return err;
228 } 231 }
229 232
230 xenbus_irq = err;
231
232 return 0; 233 return 0;
233} 234}
diff --git a/fs/Kconfig b/fs/Kconfig
index 2694648cbd1b..313b2e06ded5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -930,7 +930,7 @@ config PROC_KCORE
930 930
931config PROC_VMCORE 931config PROC_VMCORE
932 bool "/proc/vmcore support (EXPERIMENTAL)" 932 bool "/proc/vmcore support (EXPERIMENTAL)"
933 depends on PROC_FS && EXPERIMENTAL && CRASH_DUMP 933 depends on PROC_FS && CRASH_DUMP
934 default y 934 default y
935 help 935 help
936 Exports the dump image of crashed kernel in ELF format. 936 Exports the dump image of crashed kernel in ELF format.
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 7e277f2ad466..c652d469dc08 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off,
123 return proc_calc_metrics(page, start, off, count, eof, len); 123 return proc_calc_metrics(page, start, off, count, eof, len);
124} 124}
125 125
126int __attribute__((weak)) arch_report_meminfo(char *page)
127{
128 return 0;
129}
130
126static int meminfo_read_proc(char *page, char **start, off_t off, 131static int meminfo_read_proc(char *page, char **start, off_t off,
127 int count, int *eof, void *data) 132 int count, int *eof, void *data)
128{ 133{
@@ -221,6 +226,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
221 226
222 len += hugetlb_report_meminfo(page + len); 227 len += hugetlb_report_meminfo(page + len);
223 228
229 len += arch_report_meminfo(page + len);
230
224 return proc_calc_metrics(page, start, off, count, eof, len); 231 return proc_calc_metrics(page, start, off, count, eof, len);
225#undef K 232#undef K
226} 233}
@@ -472,6 +479,13 @@ static const struct file_operations proc_vmalloc_operations = {
472}; 479};
473#endif 480#endif
474 481
482#ifndef arch_irq_stat_cpu
483#define arch_irq_stat_cpu(cpu) 0
484#endif
485#ifndef arch_irq_stat
486#define arch_irq_stat() 0
487#endif
488
475static int show_stat(struct seq_file *p, void *v) 489static int show_stat(struct seq_file *p, void *v)
476{ 490{
477 int i; 491 int i;
@@ -509,7 +523,9 @@ static int show_stat(struct seq_file *p, void *v)
509 sum += temp; 523 sum += temp;
510 per_irq_sum[j] += temp; 524 per_irq_sum[j] += temp;
511 } 525 }
526 sum += arch_irq_stat_cpu(i);
512 } 527 }
528 sum += arch_irq_stat();
513 529
514 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 530 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
515 (unsigned long long)cputime64_to_clock_t(user), 531 (unsigned long long)cputime64_to_clock_t(user),
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 44ef329531c3..4fce3db2cecc 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -197,6 +197,63 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
197} 197}
198#endif /* CONFIG_MMU */ 198#endif /* CONFIG_MMU */
199 199
200static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
201 unsigned long addr,
202 pte_t *ptep)
203{
204 /*
205 * Get the current pte state, but zero it out to make it
206 * non-present, preventing the hardware from asynchronously
207 * updating it.
208 */
209 return ptep_get_and_clear(mm, addr, ptep);
210}
211
212static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
213 unsigned long addr,
214 pte_t *ptep, pte_t pte)
215{
216 /*
217 * The pte is non-present, so there's no hardware state to
218 * preserve.
219 */
220 set_pte_at(mm, addr, ptep, pte);
221}
222
223#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
224/*
225 * Start a pte protection read-modify-write transaction, which
226 * protects against asynchronous hardware modifications to the pte.
227 * The intention is not to prevent the hardware from making pte
228 * updates, but to prevent any updates it may make from being lost.
229 *
230 * This does not protect against other software modifications of the
231 * pte; the appropriate pte lock must be held over the transation.
232 *
233 * Note that this interface is intended to be batchable, meaning that
234 * ptep_modify_prot_commit may not actually update the pte, but merely
235 * queue the update to be done at some later time. The update must be
236 * actually committed before the pte lock is released, however.
237 */
238static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
239 unsigned long addr,
240 pte_t *ptep)
241{
242 return __ptep_modify_prot_start(mm, addr, ptep);
243}
244
245/*
246 * Commit an update to a pte, leaving any hardware-controlled bits in
247 * the PTE unmodified.
248 */
249static inline void ptep_modify_prot_commit(struct mm_struct *mm,
250 unsigned long addr,
251 pte_t *ptep, pte_t pte)
252{
253 __ptep_modify_prot_commit(mm, addr, ptep, pte);
254}
255#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
256
200/* 257/*
201 * A facility to provide lazy MMU batching. This allows PTE updates and 258 * A facility to provide lazy MMU batching. This allows PTE updates and
202 * page invalidations to be delayed until a call to leave lazy MMU mode 259 * page invalidations to be delayed until a call to leave lazy MMU mode
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f054778e916c..f1992dc5c424 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -93,6 +93,8 @@
93 VMLINUX_SYMBOL(__end_rio_route_ops) = .; \ 93 VMLINUX_SYMBOL(__end_rio_route_ops) = .; \
94 } \ 94 } \
95 \ 95 \
96 TRACEDATA \
97 \
96 /* Kernel symbol table: Normal symbols */ \ 98 /* Kernel symbol table: Normal symbols */ \
97 __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ 99 __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
98 VMLINUX_SYMBOL(__start___ksymtab) = .; \ 100 VMLINUX_SYMBOL(__start___ksymtab) = .; \
@@ -318,6 +320,18 @@
318 __stop___bug_table = .; \ 320 __stop___bug_table = .; \
319 } 321 }
320 322
323#ifdef CONFIG_PM_TRACE
324#define TRACEDATA \
325 . = ALIGN(4); \
326 .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { \
327 __tracedata_start = .; \
328 *(.tracedata) \
329 __tracedata_end = .; \
330 }
331#else
332#define TRACEDATA
333#endif
334
321#define NOTES \ 335#define NOTES \
322 .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ 336 .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \
323 VMLINUX_SYMBOL(__start_notes) = .; \ 337 VMLINUX_SYMBOL(__start_notes) = .; \
diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h
index 14411c9de46f..635d764dc13e 100644
--- a/include/asm-x86/acpi.h
+++ b/include/asm-x86/acpi.h
@@ -28,6 +28,7 @@
28#include <asm/numa.h> 28#include <asm/numa.h>
29#include <asm/processor.h> 29#include <asm/processor.h>
30#include <asm/mmu.h> 30#include <asm/mmu.h>
31#include <asm/mpspec.h>
31 32
32#define COMPILER_DEPENDENT_INT64 long long 33#define COMPILER_DEPENDENT_INT64 long long
33#define COMPILER_DEPENDENT_UINT64 unsigned long long 34#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -160,9 +161,7 @@ struct bootnode;
160#ifdef CONFIG_ACPI_NUMA 161#ifdef CONFIG_ACPI_NUMA
161extern int acpi_numa; 162extern int acpi_numa;
162extern int acpi_scan_nodes(unsigned long start, unsigned long end); 163extern int acpi_scan_nodes(unsigned long start, unsigned long end);
163#ifdef CONFIG_X86_64 164#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
164# define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
165#endif
166extern void acpi_fake_nodes(const struct bootnode *fake_nodes, 165extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
167 int num_nodes); 166 int num_nodes);
168#else 167#else
diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h
new file mode 100644
index 000000000000..30a12049353b
--- /dev/null
+++ b/include/asm-x86/amd_iommu.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#ifndef _ASM_X86_AMD_IOMMU_H
21#define _ASM_X86_AMD_IOMMU_H
22
23#ifdef CONFIG_AMD_IOMMU
24extern int amd_iommu_init(void);
25extern int amd_iommu_init_dma_ops(void);
26extern void amd_iommu_detect(void);
27#else
28static inline int amd_iommu_init(void) { return -ENODEV; }
29static inline void amd_iommu_detect(void) { }
30#endif
31
32#endif
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
new file mode 100644
index 000000000000..7bfcb47cc452
--- /dev/null
+++ b/include/asm-x86/amd_iommu_types.h
@@ -0,0 +1,244 @@
1/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#ifndef __AMD_IOMMU_TYPES_H__
21#define __AMD_IOMMU_TYPES_H__
22
23#include <linux/types.h>
24#include <linux/list.h>
25#include <linux/spinlock.h>
26
27/*
28 * some size calculation constants
29 */
30#define DEV_TABLE_ENTRY_SIZE 256
31#define ALIAS_TABLE_ENTRY_SIZE 2
32#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
33
34/* helper macros */
35#define LOW_U32(x) ((x) & ((1ULL << 32)-1))
36#define HIGH_U32(x) (LOW_U32((x) >> 32))
37
38/* Length of the MMIO region for the AMD IOMMU */
39#define MMIO_REGION_LENGTH 0x4000
40
41/* Capability offsets used by the driver */
42#define MMIO_CAP_HDR_OFFSET 0x00
43#define MMIO_RANGE_OFFSET 0x0c
44
45/* Masks, shifts and macros to parse the device range capability */
46#define MMIO_RANGE_LD_MASK 0xff000000
47#define MMIO_RANGE_FD_MASK 0x00ff0000
48#define MMIO_RANGE_BUS_MASK 0x0000ff00
49#define MMIO_RANGE_LD_SHIFT 24
50#define MMIO_RANGE_FD_SHIFT 16
51#define MMIO_RANGE_BUS_SHIFT 8
52#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
53#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
54#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
55
56/* Flag masks for the AMD IOMMU exclusion range */
57#define MMIO_EXCL_ENABLE_MASK 0x01ULL
58#define MMIO_EXCL_ALLOW_MASK 0x02ULL
59
60/* Used offsets into the MMIO space */
61#define MMIO_DEV_TABLE_OFFSET 0x0000
62#define MMIO_CMD_BUF_OFFSET 0x0008
63#define MMIO_EVT_BUF_OFFSET 0x0010
64#define MMIO_CONTROL_OFFSET 0x0018
65#define MMIO_EXCL_BASE_OFFSET 0x0020
66#define MMIO_EXCL_LIMIT_OFFSET 0x0028
67#define MMIO_CMD_HEAD_OFFSET 0x2000
68#define MMIO_CMD_TAIL_OFFSET 0x2008
69#define MMIO_EVT_HEAD_OFFSET 0x2010
70#define MMIO_EVT_TAIL_OFFSET 0x2018
71#define MMIO_STATUS_OFFSET 0x2020
72
73/* feature control bits */
74#define CONTROL_IOMMU_EN 0x00ULL
75#define CONTROL_HT_TUN_EN 0x01ULL
76#define CONTROL_EVT_LOG_EN 0x02ULL
77#define CONTROL_EVT_INT_EN 0x03ULL
78#define CONTROL_COMWAIT_EN 0x04ULL
79#define CONTROL_PASSPW_EN 0x08ULL
80#define CONTROL_RESPASSPW_EN 0x09ULL
81#define CONTROL_COHERENT_EN 0x0aULL
82#define CONTROL_ISOC_EN 0x0bULL
83#define CONTROL_CMDBUF_EN 0x0cULL
84#define CONTROL_PPFLOG_EN 0x0dULL
85#define CONTROL_PPFINT_EN 0x0eULL
86
87/* command specific defines */
88#define CMD_COMPL_WAIT 0x01
89#define CMD_INV_DEV_ENTRY 0x02
90#define CMD_INV_IOMMU_PAGES 0x03
91
92#define CMD_COMPL_WAIT_STORE_MASK 0x01
93#define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01
94#define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02
95
96#define CMD_INV_IOMMU_ALL_PAGES_ADDRESS 0x7fffffffffffffffULL
97
98/* macros and definitions for device table entries */
99#define DEV_ENTRY_VALID 0x00
100#define DEV_ENTRY_TRANSLATION 0x01
101#define DEV_ENTRY_IR 0x3d
102#define DEV_ENTRY_IW 0x3e
103#define DEV_ENTRY_EX 0x67
104#define DEV_ENTRY_SYSMGT1 0x68
105#define DEV_ENTRY_SYSMGT2 0x69
106#define DEV_ENTRY_INIT_PASS 0xb8
107#define DEV_ENTRY_EINT_PASS 0xb9
108#define DEV_ENTRY_NMI_PASS 0xba
109#define DEV_ENTRY_LINT0_PASS 0xbe
110#define DEV_ENTRY_LINT1_PASS 0xbf
111
112/* constants to configure the command buffer */
113#define CMD_BUFFER_SIZE 8192
114#define CMD_BUFFER_ENTRIES 512
115#define MMIO_CMD_SIZE_SHIFT 56
116#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
117
118#define PAGE_MODE_1_LEVEL 0x01
119#define PAGE_MODE_2_LEVEL 0x02
120#define PAGE_MODE_3_LEVEL 0x03
121
122#define IOMMU_PDE_NL_0 0x000ULL
123#define IOMMU_PDE_NL_1 0x200ULL
124#define IOMMU_PDE_NL_2 0x400ULL
125#define IOMMU_PDE_NL_3 0x600ULL
126
127#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL)
128#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL)
129#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL)
130
131#define IOMMU_MAP_SIZE_L1 (1ULL << 21)
132#define IOMMU_MAP_SIZE_L2 (1ULL << 30)
133#define IOMMU_MAP_SIZE_L3 (1ULL << 39)
134
135#define IOMMU_PTE_P (1ULL << 0)
136#define IOMMU_PTE_U (1ULL << 59)
137#define IOMMU_PTE_FC (1ULL << 60)
138#define IOMMU_PTE_IR (1ULL << 61)
139#define IOMMU_PTE_IW (1ULL << 62)
140
141#define IOMMU_L1_PDE(address) \
142 ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
143#define IOMMU_L2_PDE(address) \
144 ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
145
146#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
147#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
148#define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
149#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
150
151#define IOMMU_PROT_MASK 0x03
152#define IOMMU_PROT_IR 0x01
153#define IOMMU_PROT_IW 0x02
154
155/* IOMMU capabilities */
156#define IOMMU_CAP_IOTLB 24
157#define IOMMU_CAP_NPCACHE 26
158
159#define MAX_DOMAIN_ID 65536
160
161struct protection_domain {
162 spinlock_t lock;
163 u16 id;
164 int mode;
165 u64 *pt_root;
166 void *priv;
167};
168
169struct dma_ops_domain {
170 struct list_head list;
171 struct protection_domain domain;
172 unsigned long aperture_size;
173 unsigned long next_bit;
174 unsigned long *bitmap;
175 u64 **pte_pages;
176};
177
178struct amd_iommu {
179 struct list_head list;
180 spinlock_t lock;
181
182 u16 devid;
183 u16 cap_ptr;
184
185 u64 mmio_phys;
186 u8 *mmio_base;
187 u32 cap;
188 u16 first_device;
189 u16 last_device;
190 u64 exclusion_start;
191 u64 exclusion_length;
192
193 u8 *cmd_buf;
194 u32 cmd_buf_size;
195
196 int need_sync;
197
198 struct dma_ops_domain *default_dom;
199};
200
201extern struct list_head amd_iommu_list;
202
203struct dev_table_entry {
204 u32 data[8];
205};
206
207struct unity_map_entry {
208 struct list_head list;
209 u16 devid_start;
210 u16 devid_end;
211 u64 address_start;
212 u64 address_end;
213 int prot;
214};
215
216extern struct list_head amd_iommu_unity_map;
217
218/* data structures for device handling */
219extern struct dev_table_entry *amd_iommu_dev_table;
220extern u16 *amd_iommu_alias_table;
221extern struct amd_iommu **amd_iommu_rlookup_table;
222
223extern unsigned amd_iommu_aperture_order;
224
225extern u16 amd_iommu_last_bdf;
226
227/* data structures for protection domain handling */
228extern struct protection_domain **amd_iommu_pd_table;
229extern unsigned long *amd_iommu_pd_alloc_bitmap;
230
231extern int amd_iommu_isolate;
232
233static inline void print_devid(u16 devid, int nl)
234{
235 int bus = devid >> 8;
236 int dev = devid >> 3 & 0x1f;
237 int fn = devid & 0x07;
238
239 printk("%02x:%02x.%x", bus, dev, fn);
240 if (nl)
241 printk("\n");
242}
243
244#endif
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index be9639a9a186..4e2c1e517f06 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -36,15 +36,11 @@ extern void generic_apic_probe(void);
36#ifdef CONFIG_X86_LOCAL_APIC 36#ifdef CONFIG_X86_LOCAL_APIC
37 37
38extern int apic_verbosity; 38extern int apic_verbosity;
39extern int timer_over_8254;
40extern int local_apic_timer_c2_ok; 39extern int local_apic_timer_c2_ok;
41extern int local_apic_timer_disabled;
42 40
43extern int apic_runs_main_timer;
44extern int ioapic_force; 41extern int ioapic_force;
45extern int disable_apic;
46extern int disable_apic_timer;
47 42
43extern int disable_apic;
48/* 44/*
49 * Basic functions accessing APICs. 45 * Basic functions accessing APICs.
50 */ 46 */
@@ -125,16 +121,22 @@ extern void enable_NMI_through_LVT0(void);
125 */ 121 */
126#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
127extern void early_init_lapic_mapping(void); 123extern void early_init_lapic_mapping(void);
124extern int apic_is_clustered_box(void);
125#else
126static inline int apic_is_clustered_box(void)
127{
128 return 0;
129}
128#endif 130#endif
129 131
130extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask); 132extern u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask);
131extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); 133extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
132 134
133extern int apic_is_clustered_box(void);
134 135
135#else /* !CONFIG_X86_LOCAL_APIC */ 136#else /* !CONFIG_X86_LOCAL_APIC */
136static inline void lapic_shutdown(void) { } 137static inline void lapic_shutdown(void) { }
137#define local_apic_timer_c2_ok 1 138#define local_apic_timer_c2_ok 1
139static inline void init_apic_mappings(void) { }
138 140
139#endif /* !CONFIG_X86_LOCAL_APIC */ 141#endif /* !CONFIG_X86_LOCAL_APIC */
140 142
diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h
index 90dec0c23646..97220321f39d 100644
--- a/include/asm-x86/asm.h
+++ b/include/asm-x86/asm.h
@@ -1,37 +1,40 @@
1#ifndef _ASM_X86_ASM_H 1#ifndef _ASM_X86_ASM_H
2#define _ASM_X86_ASM_H 2#define _ASM_X86_ASM_H
3 3
4#ifdef CONFIG_X86_32 4#ifdef __ASSEMBLY__
5/* 32 bits */ 5# define __ASM_FORM(x) x
6 6# define __ASM_EX_SEC .section __ex_table
7# define _ASM_PTR " .long "
8# define _ASM_ALIGN " .balign 4 "
9# define _ASM_MOV_UL " movl "
10
11# define _ASM_INC " incl "
12# define _ASM_DEC " decl "
13# define _ASM_ADD " addl "
14# define _ASM_SUB " subl "
15# define _ASM_XADD " xaddl "
16
17#else 7#else
18/* 64 bits */ 8# define __ASM_FORM(x) " " #x " "
9# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
10#endif
19 11
20# define _ASM_PTR " .quad " 12#ifdef CONFIG_X86_32
21# define _ASM_ALIGN " .balign 8 " 13# define __ASM_SEL(a,b) __ASM_FORM(a)
22# define _ASM_MOV_UL " movq " 14#else
23 15# define __ASM_SEL(a,b) __ASM_FORM(b)
24# define _ASM_INC " incq " 16#endif
25# define _ASM_DEC " decq " 17
26# define _ASM_ADD " addq " 18#define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q)
27# define _ASM_SUB " subq " 19#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg)
28# define _ASM_XADD " xaddq " 20
29 21#define _ASM_PTR __ASM_SEL(.long, .quad)
30#endif /* CONFIG_X86_32 */ 22#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
23#define _ASM_MOV_UL __ASM_SIZE(mov)
24
25#define _ASM_INC __ASM_SIZE(inc)
26#define _ASM_DEC __ASM_SIZE(dec)
27#define _ASM_ADD __ASM_SIZE(add)
28#define _ASM_SUB __ASM_SIZE(sub)
29#define _ASM_XADD __ASM_SIZE(xadd)
30#define _ASM_AX __ASM_REG(ax)
31#define _ASM_BX __ASM_REG(bx)
32#define _ASM_CX __ASM_REG(cx)
33#define _ASM_DX __ASM_REG(dx)
31 34
32/* Exception table entry */ 35/* Exception table entry */
33# define _ASM_EXTABLE(from,to) \ 36# define _ASM_EXTABLE(from,to) \
34 " .section __ex_table,\"a\"\n" \ 37 __ASM_EX_SEC \
35 _ASM_ALIGN "\n" \ 38 _ASM_ALIGN "\n" \
36 _ASM_PTR #from "," #to "\n" \ 39 _ASM_PTR #from "," #to "\n" \
37 " .previous\n" 40 " .previous\n"
diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index 3e0cd7d38335..a0095191c02e 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h
@@ -11,12 +11,6 @@
11 * resource counting etc.. 11 * resource counting etc..
12 */ 12 */
13 13
14#ifdef CONFIG_SMP
15#define LOCK "lock ; "
16#else
17#define LOCK ""
18#endif
19
20/* 14/*
21 * Make sure gcc doesn't try to be clever and move things around 15 * Make sure gcc doesn't try to be clever and move things around
22 * on us. We need to use _exactly_ the address the user gave us, 16 * on us. We need to use _exactly_ the address the user gave us,
@@ -431,6 +425,32 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
431 return c != (u); 425 return c != (u);
432} 426}
433 427
428/**
429 * atomic_inc_short - increment of a short integer
430 * @v: pointer to type int
431 *
432 * Atomically adds 1 to @v
433 * Returns the new value of @u
434 */
435static inline short int atomic_inc_short(short int *v)
436{
437 asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
438 return *v;
439}
440
441/**
442 * atomic_or_long - OR of two long integers
443 * @v1: pointer to type unsigned long
444 * @v2: pointer to type unsigned long
445 *
446 * Atomically ORs @v1 and @v2
447 * Returns the result of the OR
448 */
449static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
450{
451 asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
452}
453
434#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) 454#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
435 455
436/* These are x86-specific, used by some header files */ 456/* These are x86-specific, used by some header files */
diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h
index b4a46b7be794..0033e50c13b2 100644
--- a/include/asm-x86/bios_ebda.h
+++ b/include/asm-x86/bios_ebda.h
@@ -14,4 +14,6 @@ static inline unsigned int get_bios_ebda(void)
14 return address; /* 0 means none */ 14 return address; /* 0 means none */
15} 15}
16 16
17void reserve_ebda_region(void);
18
17#endif /* _MACH_BIOS_EBDA_H */ 19#endif /* _MACH_BIOS_EBDA_H */
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index ee4b3ead6a43..96b1829cea15 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -23,11 +23,21 @@
23#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) 23#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
24/* Technically wrong, but this avoids compilation errors on some gcc 24/* Technically wrong, but this avoids compilation errors on some gcc
25 versions. */ 25 versions. */
26#define ADDR "=m" (*(volatile long *) addr) 26#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
27#else 27#else
28#define ADDR "+m" (*(volatile long *) addr) 28#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
29#endif 29#endif
30 30
31#define ADDR BITOP_ADDR(addr)
32
33/*
34 * We do the locked ops that don't return the old value as
35 * a mask operation on a byte.
36 */
37#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
38#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
39#define CONST_MASK(nr) (1 << ((nr) & 7))
40
31/** 41/**
32 * set_bit - Atomically set a bit in memory 42 * set_bit - Atomically set a bit in memory
33 * @nr: the bit to set 43 * @nr: the bit to set
@@ -43,9 +53,17 @@
43 * Note that @nr may be almost arbitrarily large; this function is not 53 * Note that @nr may be almost arbitrarily large; this function is not
44 * restricted to acting on a single-word quantity. 54 * restricted to acting on a single-word quantity.
45 */ 55 */
46static inline void set_bit(int nr, volatile void *addr) 56static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
47{ 57{
48 asm volatile(LOCK_PREFIX "bts %1,%0" : ADDR : "Ir" (nr) : "memory"); 58 if (IS_IMMEDIATE(nr)) {
59 asm volatile(LOCK_PREFIX "orb %1,%0"
60 : CONST_MASK_ADDR(nr, addr)
61 : "iq" ((u8)CONST_MASK(nr))
62 : "memory");
63 } else {
64 asm volatile(LOCK_PREFIX "bts %1,%0"
65 : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
66 }
49} 67}
50 68
51/** 69/**
@@ -57,7 +75,7 @@ static inline void set_bit(int nr, volatile void *addr)
57 * If it's called on the same region of memory simultaneously, the effect 75 * If it's called on the same region of memory simultaneously, the effect
58 * may be that only one operation succeeds. 76 * may be that only one operation succeeds.
59 */ 77 */
60static inline void __set_bit(int nr, volatile void *addr) 78static inline void __set_bit(int nr, volatile unsigned long *addr)
61{ 79{
62 asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); 80 asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
63} 81}
@@ -72,9 +90,17 @@ static inline void __set_bit(int nr, volatile void *addr)
72 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 90 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
73 * in order to ensure changes are visible on other processors. 91 * in order to ensure changes are visible on other processors.
74 */ 92 */
75static inline void clear_bit(int nr, volatile void *addr) 93static inline void clear_bit(int nr, volatile unsigned long *addr)
76{ 94{
77 asm volatile(LOCK_PREFIX "btr %1,%0" : ADDR : "Ir" (nr)); 95 if (IS_IMMEDIATE(nr)) {
96 asm volatile(LOCK_PREFIX "andb %1,%0"
97 : CONST_MASK_ADDR(nr, addr)
98 : "iq" ((u8)~CONST_MASK(nr)));
99 } else {
100 asm volatile(LOCK_PREFIX "btr %1,%0"
101 : BITOP_ADDR(addr)
102 : "Ir" (nr));
103 }
78} 104}
79 105
80/* 106/*
@@ -85,13 +111,13 @@ static inline void clear_bit(int nr, volatile void *addr)
85 * clear_bit() is atomic and implies release semantics before the memory 111 * clear_bit() is atomic and implies release semantics before the memory
86 * operation. It can be used for an unlock. 112 * operation. It can be used for an unlock.
87 */ 113 */
88static inline void clear_bit_unlock(unsigned nr, volatile void *addr) 114static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
89{ 115{
90 barrier(); 116 barrier();
91 clear_bit(nr, addr); 117 clear_bit(nr, addr);
92} 118}
93 119
94static inline void __clear_bit(int nr, volatile void *addr) 120static inline void __clear_bit(int nr, volatile unsigned long *addr)
95{ 121{
96 asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); 122 asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
97} 123}
@@ -108,7 +134,7 @@ static inline void __clear_bit(int nr, volatile void *addr)
108 * No memory barrier is required here, because x86 cannot reorder stores past 134 * No memory barrier is required here, because x86 cannot reorder stores past
109 * older loads. Same principle as spin_unlock. 135 * older loads. Same principle as spin_unlock.
110 */ 136 */
111static inline void __clear_bit_unlock(unsigned nr, volatile void *addr) 137static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
112{ 138{
113 barrier(); 139 barrier();
114 __clear_bit(nr, addr); 140 __clear_bit(nr, addr);
@@ -126,7 +152,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile void *addr)
126 * If it's called on the same region of memory simultaneously, the effect 152 * If it's called on the same region of memory simultaneously, the effect
127 * may be that only one operation succeeds. 153 * may be that only one operation succeeds.
128 */ 154 */
129static inline void __change_bit(int nr, volatile void *addr) 155static inline void __change_bit(int nr, volatile unsigned long *addr)
130{ 156{
131 asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); 157 asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
132} 158}
@@ -140,7 +166,7 @@ static inline void __change_bit(int nr, volatile void *addr)
140 * Note that @nr may be almost arbitrarily large; this function is not 166 * Note that @nr may be almost arbitrarily large; this function is not
141 * restricted to acting on a single-word quantity. 167 * restricted to acting on a single-word quantity.
142 */ 168 */
143static inline void change_bit(int nr, volatile void *addr) 169static inline void change_bit(int nr, volatile unsigned long *addr)
144{ 170{
145 asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); 171 asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
146} 172}
@@ -153,7 +179,7 @@ static inline void change_bit(int nr, volatile void *addr)
153 * This operation is atomic and cannot be reordered. 179 * This operation is atomic and cannot be reordered.
154 * It also implies a memory barrier. 180 * It also implies a memory barrier.
155 */ 181 */
156static inline int test_and_set_bit(int nr, volatile void *addr) 182static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
157{ 183{
158 int oldbit; 184 int oldbit;
159 185
@@ -170,7 +196,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
170 * 196 *
171 * This is the same as test_and_set_bit on x86. 197 * This is the same as test_and_set_bit on x86.
172 */ 198 */
173static inline int test_and_set_bit_lock(int nr, volatile void *addr) 199static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr)
174{ 200{
175 return test_and_set_bit(nr, addr); 201 return test_and_set_bit(nr, addr);
176} 202}
@@ -184,7 +210,7 @@ static inline int test_and_set_bit_lock(int nr, volatile void *addr)
184 * If two examples of this operation race, one can appear to succeed 210 * If two examples of this operation race, one can appear to succeed
185 * but actually fail. You must protect multiple accesses with a lock. 211 * but actually fail. You must protect multiple accesses with a lock.
186 */ 212 */
187static inline int __test_and_set_bit(int nr, volatile void *addr) 213static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
188{ 214{
189 int oldbit; 215 int oldbit;
190 216
@@ -203,7 +229,7 @@ static inline int __test_and_set_bit(int nr, volatile void *addr)
203 * This operation is atomic and cannot be reordered. 229 * This operation is atomic and cannot be reordered.
204 * It also implies a memory barrier. 230 * It also implies a memory barrier.
205 */ 231 */
206static inline int test_and_clear_bit(int nr, volatile void *addr) 232static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
207{ 233{
208 int oldbit; 234 int oldbit;
209 235
@@ -223,7 +249,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
223 * If two examples of this operation race, one can appear to succeed 249 * If two examples of this operation race, one can appear to succeed
224 * but actually fail. You must protect multiple accesses with a lock. 250 * but actually fail. You must protect multiple accesses with a lock.
225 */ 251 */
226static inline int __test_and_clear_bit(int nr, volatile void *addr) 252static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
227{ 253{
228 int oldbit; 254 int oldbit;
229 255
@@ -235,7 +261,7 @@ static inline int __test_and_clear_bit(int nr, volatile void *addr)
235} 261}
236 262
237/* WARNING: non atomic and it can be reordered! */ 263/* WARNING: non atomic and it can be reordered! */
238static inline int __test_and_change_bit(int nr, volatile void *addr) 264static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
239{ 265{
240 int oldbit; 266 int oldbit;
241 267
@@ -255,7 +281,7 @@ static inline int __test_and_change_bit(int nr, volatile void *addr)
255 * This operation is atomic and cannot be reordered. 281 * This operation is atomic and cannot be reordered.
256 * It also implies a memory barrier. 282 * It also implies a memory barrier.
257 */ 283 */
258static inline int test_and_change_bit(int nr, volatile void *addr) 284static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
259{ 285{
260 int oldbit; 286 int oldbit;
261 287
@@ -266,13 +292,13 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
266 return oldbit; 292 return oldbit;
267} 293}
268 294
269static inline int constant_test_bit(int nr, const volatile void *addr) 295static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
270{ 296{
271 return ((1UL << (nr % BITS_PER_LONG)) & 297 return ((1UL << (nr % BITS_PER_LONG)) &
272 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 298 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
273} 299}
274 300
275static inline int variable_test_bit(int nr, volatile const void *addr) 301static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
276{ 302{
277 int oldbit; 303 int oldbit;
278 304
diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
index f62f4733606b..ae22bdf0ab14 100644
--- a/include/asm-x86/bootparam.h
+++ b/include/asm-x86/bootparam.h
@@ -11,6 +11,7 @@
11 11
12/* setup data types */ 12/* setup data types */
13#define SETUP_NONE 0 13#define SETUP_NONE 0
14#define SETUP_E820_EXT 1
14 15
15/* extensible setup data list node */ 16/* extensible setup data list node */
16struct setup_data { 17struct setup_data {
@@ -40,6 +41,7 @@ struct setup_header {
40 __u8 type_of_loader; 41 __u8 type_of_loader;
41 __u8 loadflags; 42 __u8 loadflags;
42#define LOADED_HIGH (1<<0) 43#define LOADED_HIGH (1<<0)
44#define QUIET_FLAG (1<<5)
43#define KEEP_SEGMENTS (1<<6) 45#define KEEP_SEGMENTS (1<<6)
44#define CAN_USE_HEAP (1<<7) 46#define CAN_USE_HEAP (1<<7)
45 __u16 setup_move_size; 47 __u16 setup_move_size;
diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h
index d9b26b9a28cf..17463ccf8166 100644
--- a/include/asm-x86/cmpxchg_64.h
+++ b/include/asm-x86/cmpxchg_64.h
@@ -93,6 +93,39 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
93 return old; 93 return old;
94} 94}
95 95
96/*
97 * Always use locked operations when touching memory shared with a
98 * hypervisor, since the system may be SMP even if the guest kernel
99 * isn't.
100 */
101static inline unsigned long __sync_cmpxchg(volatile void *ptr,
102 unsigned long old,
103 unsigned long new, int size)
104{
105 unsigned long prev;
106 switch (size) {
107 case 1:
108 asm volatile("lock; cmpxchgb %b1,%2"
109 : "=a"(prev)
110 : "q"(new), "m"(*__xg(ptr)), "0"(old)
111 : "memory");
112 return prev;
113 case 2:
114 asm volatile("lock; cmpxchgw %w1,%2"
115 : "=a"(prev)
116 : "r"(new), "m"(*__xg(ptr)), "0"(old)
117 : "memory");
118 return prev;
119 case 4:
120 asm volatile("lock; cmpxchgl %1,%2"
121 : "=a"(prev)
122 : "r"(new), "m"(*__xg(ptr)), "0"(old)
123 : "memory");
124 return prev;
125 }
126 return old;
127}
128
96static inline unsigned long __cmpxchg_local(volatile void *ptr, 129static inline unsigned long __cmpxchg_local(volatile void *ptr,
97 unsigned long old, 130 unsigned long old,
98 unsigned long new, int size) 131 unsigned long new, int size)
@@ -139,6 +172,10 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
139 ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ 172 ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \
140 (unsigned long)(n), \ 173 (unsigned long)(n), \
141 sizeof(*(ptr)))) 174 sizeof(*(ptr))))
175#define sync_cmpxchg(ptr, o, n) \
176 ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \
177 (unsigned long)(n), \
178 sizeof(*(ptr))))
142#define cmpxchg64_local(ptr, o, n) \ 179#define cmpxchg64_local(ptr, o, n) \
143({ \ 180({ \
144 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 181 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 0d609c837a41..75ef959db329 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -74,8 +74,8 @@
74#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 74#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
75#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 75#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
76#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 76#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
77/* 14 free */ 77#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */
78/* 15 free */ 78#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */
79#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ 79#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */
80#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ 80#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
81#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ 81#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
@@ -106,6 +106,7 @@
106/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 106/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
107#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ 107#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
108#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ 108#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
109#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */
109 110
110/* 111/*
111 * Auxiliary flags: Linux defined - For features scattered in various 112 * Auxiliary flags: Linux defined - For features scattered in various
@@ -142,11 +143,11 @@ extern const char * const x86_power_flags[32];
142#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) 143#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
143#define setup_clear_cpu_cap(bit) do { \ 144#define setup_clear_cpu_cap(bit) do { \
144 clear_cpu_cap(&boot_cpu_data, bit); \ 145 clear_cpu_cap(&boot_cpu_data, bit); \
145 set_bit(bit, cleared_cpu_caps); \ 146 set_bit(bit, (unsigned long *)cleared_cpu_caps); \
146} while (0) 147} while (0)
147#define setup_force_cpu_cap(bit) do { \ 148#define setup_force_cpu_cap(bit) do { \
148 set_cpu_cap(&boot_cpu_data, bit); \ 149 set_cpu_cap(&boot_cpu_data, bit); \
149 clear_bit(bit, cleared_cpu_caps); \ 150 clear_bit(bit, (unsigned long *)cleared_cpu_caps); \
150} while (0) 151} while (0)
151 152
152#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) 153#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h
index d2526d3f7346..7515c19d4988 100644
--- a/include/asm-x86/current.h
+++ b/include/asm-x86/current.h
@@ -1,5 +1,39 @@
1#ifndef _X86_CURRENT_H
2#define _X86_CURRENT_H
3
1#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
2# include "current_32.h" 5#include <linux/compiler.h>
3#else 6#include <asm/percpu.h>
4# include "current_64.h" 7
5#endif 8struct task_struct;
9
10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void)
12{
13 return x86_read_percpu(current_task);
14}
15
16#else /* X86_32 */
17
18#ifndef __ASSEMBLY__
19#include <asm/pda.h>
20
21struct task_struct;
22
23static __always_inline struct task_struct *get_current(void)
24{
25 return read_pda(pcurrent);
26}
27
28#else /* __ASSEMBLY__ */
29
30#include <asm/asm-offsets.h>
31#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
32
33#endif /* __ASSEMBLY__ */
34
35#endif /* X86_32 */
36
37#define current get_current()
38
39#endif /* X86_CURRENT_H */
diff --git a/include/asm-x86/current_32.h b/include/asm-x86/current_32.h
deleted file mode 100644
index 5af9bdb97a16..000000000000
--- a/include/asm-x86/current_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
1#ifndef _I386_CURRENT_H
2#define _I386_CURRENT_H
3
4#include <linux/compiler.h>
5#include <asm/percpu.h>
6
7struct task_struct;
8
9DECLARE_PER_CPU(struct task_struct *, current_task);
10static __always_inline struct task_struct *get_current(void)
11{
12 return x86_read_percpu(current_task);
13}
14
15#define current get_current()
16
17#endif /* !(_I386_CURRENT_H) */
diff --git a/include/asm-x86/current_64.h b/include/asm-x86/current_64.h
deleted file mode 100644
index 2d368ede2fc1..000000000000
--- a/include/asm-x86/current_64.h
+++ /dev/null
@@ -1,27 +0,0 @@
1#ifndef _X86_64_CURRENT_H
2#define _X86_64_CURRENT_H
3
4#if !defined(__ASSEMBLY__)
5struct task_struct;
6
7#include <asm/pda.h>
8
9static inline struct task_struct *get_current(void)
10{
11 struct task_struct *t = read_pda(pcurrent);
12 return t;
13}
14
15#define current get_current()
16
17#else
18
19#ifndef ASM_OFFSET_H
20#include <asm/asm-offsets.h>
21#endif
22
23#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
24
25#endif
26
27#endif /* !(_X86_64_CURRENT_H) */
diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h
index 268a012bcd79..07f9f2b17be8 100644
--- a/include/asm-x86/desc.h
+++ b/include/asm-x86/desc.h
@@ -29,11 +29,17 @@ static inline void fill_ldt(struct desc_struct *desc,
29extern struct desc_ptr idt_descr; 29extern struct desc_ptr idt_descr;
30extern gate_desc idt_table[]; 30extern gate_desc idt_table[];
31 31
32struct gdt_page {
33 struct desc_struct gdt[GDT_ENTRIES];
34} __attribute__((aligned(PAGE_SIZE)));
35DECLARE_PER_CPU(struct gdt_page, gdt_page);
36
37static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
38{
39 return per_cpu(gdt_page, cpu).gdt;
40}
41
32#ifdef CONFIG_X86_64 42#ifdef CONFIG_X86_64
33extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
34extern struct desc_ptr cpu_gdt_descr[];
35/* the cpu gdt accessor */
36#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
37 43
38static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, 44static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
39 unsigned dpl, unsigned ist, unsigned seg) 45 unsigned dpl, unsigned ist, unsigned seg)
@@ -51,16 +57,6 @@ static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
51} 57}
52 58
53#else 59#else
54struct gdt_page {
55 struct desc_struct gdt[GDT_ENTRIES];
56} __attribute__((aligned(PAGE_SIZE)));
57DECLARE_PER_CPU(struct gdt_page, gdt_page);
58
59static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
60{
61 return per_cpu(gdt_page, cpu).gdt;
62}
63
64static inline void pack_gate(gate_desc *gate, unsigned char type, 60static inline void pack_gate(gate_desc *gate, unsigned char type,
65 unsigned long base, unsigned dpl, unsigned flags, 61 unsigned long base, unsigned dpl, unsigned flags,
66 unsigned short seg) 62 unsigned short seg)
@@ -311,6 +307,28 @@ static inline void set_intr_gate(unsigned int n, void *addr)
311 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); 307 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
312} 308}
313 309
310#define SYS_VECTOR_FREE 0
311#define SYS_VECTOR_ALLOCED 1
312
313extern int first_system_vector;
314extern char system_vectors[];
315
316static inline void alloc_system_vector(int vector)
317{
318 if (system_vectors[vector] == SYS_VECTOR_FREE) {
319 system_vectors[vector] = SYS_VECTOR_ALLOCED;
320 if (first_system_vector > vector)
321 first_system_vector = vector;
322 } else
323 BUG();
324}
325
326static inline void alloc_intr_gate(unsigned int n, void *addr)
327{
328 alloc_system_vector(n);
329 set_intr_gate(n, addr);
330}
331
314/* 332/*
315 * This routine sets up an interrupt gate at directory privilege level 3. 333 * This routine sets up an interrupt gate at directory privilege level 3.
316 */ 334 */
diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h
index eccb4ea1f918..f7bacf357dac 100644
--- a/include/asm-x86/desc_defs.h
+++ b/include/asm-x86/desc_defs.h
@@ -75,10 +75,14 @@ struct ldttss_desc64 {
75typedef struct gate_struct64 gate_desc; 75typedef struct gate_struct64 gate_desc;
76typedef struct ldttss_desc64 ldt_desc; 76typedef struct ldttss_desc64 ldt_desc;
77typedef struct ldttss_desc64 tss_desc; 77typedef struct ldttss_desc64 tss_desc;
78#define gate_offset(g) ((g).offset_low | ((unsigned long)(g).offset_middle << 16) | ((unsigned long)(g).offset_high << 32))
79#define gate_segment(g) ((g).segment)
78#else 80#else
79typedef struct desc_struct gate_desc; 81typedef struct desc_struct gate_desc;
80typedef struct desc_struct ldt_desc; 82typedef struct desc_struct ldt_desc;
81typedef struct desc_struct tss_desc; 83typedef struct desc_struct tss_desc;
84#define gate_offset(g) (((g).b & 0xffff0000) | ((g).a & 0x0000ffff))
85#define gate_segment(g) ((g).a >> 16)
82#endif 86#endif
83 87
84struct desc_ptr { 88struct desc_ptr {
diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h
index 4edf7514a750..58a86571fe0f 100644
--- a/include/asm-x86/dmi.h
+++ b/include/asm-x86/dmi.h
@@ -3,12 +3,6 @@
3 3
4#include <asm/io.h> 4#include <asm/io.h>
5 5
6#ifdef CONFIG_X86_32
7
8#define dmi_alloc alloc_bootmem
9
10#else /* CONFIG_X86_32 */
11
12#define DMI_MAX_DATA 2048 6#define DMI_MAX_DATA 2048
13 7
14extern int dmi_alloc_index; 8extern int dmi_alloc_index;
@@ -25,8 +19,6 @@ static inline void *dmi_alloc(unsigned len)
25 return dmi_alloc_data + idx; 19 return dmi_alloc_data + idx;
26} 20}
27 21
28#endif
29
30/* Use early IO mappings for DMI because it's initialized early */ 22/* Use early IO mappings for DMI because it's initialized early */
31#define dmi_ioremap early_ioremap 23#define dmi_ioremap early_ioremap
32#define dmi_iounmap early_iounmap 24#define dmi_iounmap early_iounmap
diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h
index b3cbb0ccae18..0bfe250894f7 100644
--- a/include/asm-x86/dwarf2.h
+++ b/include/asm-x86/dwarf2.h
@@ -1,5 +1,61 @@
1#ifdef CONFIG_X86_32 1#ifndef _DWARF2_H
2# include "dwarf2_32.h" 2#define _DWARF2_H
3
4#ifndef __ASSEMBLY__
5#warning "asm/dwarf2.h should be only included in pure assembly files"
6#endif
7
8/*
9 Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them
12 away for older version.
13 */
14
15#ifdef CONFIG_AS_CFI
16
17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined
30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else
34#define CFI_SIGNAL_FRAME
35#endif
36
3#else 37#else
4# include "dwarf2_64.h" 38
39/* Due to the structure of pre-exisiting code, don't use assembler line
40 comment character # to ignore the arguments. Instead, use a dummy macro. */
41.macro ignore a=0, b=0, c=0, d=0
42.endm
43
44#define CFI_STARTPROC ignore
45#define CFI_ENDPROC ignore
46#define CFI_DEF_CFA ignore
47#define CFI_DEF_CFA_REGISTER ignore
48#define CFI_DEF_CFA_OFFSET ignore
49#define CFI_ADJUST_CFA_OFFSET ignore
50#define CFI_OFFSET ignore
51#define CFI_REL_OFFSET ignore
52#define CFI_REGISTER ignore
53#define CFI_RESTORE ignore
54#define CFI_REMEMBER_STATE ignore
55#define CFI_RESTORE_STATE ignore
56#define CFI_UNDEFINED ignore
57#define CFI_SIGNAL_FRAME ignore
58
59#endif
60
5#endif 61#endif
diff --git a/include/asm-x86/dwarf2_32.h b/include/asm-x86/dwarf2_32.h
deleted file mode 100644
index 6d66398a307d..000000000000
--- a/include/asm-x86/dwarf2_32.h
+++ /dev/null
@@ -1,61 +0,0 @@
1#ifndef _DWARF2_H
2#define _DWARF2_H
3
4#ifndef __ASSEMBLY__
5#warning "asm/dwarf2.h should be only included in pure assembly files"
6#endif
7
8/*
9 Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them
12 away for older version.
13 */
14
15#ifdef CONFIG_UNWIND_INFO
16
17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined
30
31#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
32#define CFI_SIGNAL_FRAME .cfi_signal_frame
33#else
34#define CFI_SIGNAL_FRAME
35#endif
36
37#else
38
39/* Due to the structure of pre-exisiting code, don't use assembler line
40 comment character # to ignore the arguments. Instead, use a dummy macro. */
41.macro ignore a=0, b=0, c=0, d=0
42.endm
43
44#define CFI_STARTPROC ignore
45#define CFI_ENDPROC ignore
46#define CFI_DEF_CFA ignore
47#define CFI_DEF_CFA_REGISTER ignore
48#define CFI_DEF_CFA_OFFSET ignore
49#define CFI_ADJUST_CFA_OFFSET ignore
50#define CFI_OFFSET ignore
51#define CFI_REL_OFFSET ignore
52#define CFI_REGISTER ignore
53#define CFI_RESTORE ignore
54#define CFI_REMEMBER_STATE ignore
55#define CFI_RESTORE_STATE ignore
56#define CFI_UNDEFINED ignore
57#define CFI_SIGNAL_FRAME ignore
58
59#endif
60
61#endif
diff --git a/include/asm-x86/dwarf2_64.h b/include/asm-x86/dwarf2_64.h
deleted file mode 100644
index c950519a264d..000000000000
--- a/include/asm-x86/dwarf2_64.h
+++ /dev/null
@@ -1,56 +0,0 @@
1#ifndef _DWARF2_H
2#define _DWARF2_H 1
3
4#ifndef __ASSEMBLY__
5#warning "asm/dwarf2.h should be only included in pure assembly files"
6#endif
7
8/*
9 Macros for dwarf2 CFI unwind table entries.
10 See "as.info" for details on these pseudo ops. Unfortunately
11 they are only supported in very new binutils, so define them
12 away for older version.
13 */
14
15#ifdef CONFIG_AS_CFI
16
17#define CFI_STARTPROC .cfi_startproc
18#define CFI_ENDPROC .cfi_endproc
19#define CFI_DEF_CFA .cfi_def_cfa
20#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
21#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
22#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
23#define CFI_OFFSET .cfi_offset
24#define CFI_REL_OFFSET .cfi_rel_offset
25#define CFI_REGISTER .cfi_register
26#define CFI_RESTORE .cfi_restore
27#define CFI_REMEMBER_STATE .cfi_remember_state
28#define CFI_RESTORE_STATE .cfi_restore_state
29#define CFI_UNDEFINED .cfi_undefined
30#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
31#define CFI_SIGNAL_FRAME .cfi_signal_frame
32#else
33#define CFI_SIGNAL_FRAME
34#endif
35
36#else
37
38/* use assembler line comment character # to ignore the arguments. */
39#define CFI_STARTPROC #
40#define CFI_ENDPROC #
41#define CFI_DEF_CFA #
42#define CFI_DEF_CFA_REGISTER #
43#define CFI_DEF_CFA_OFFSET #
44#define CFI_ADJUST_CFA_OFFSET #
45#define CFI_OFFSET #
46#define CFI_REL_OFFSET #
47#define CFI_REGISTER #
48#define CFI_RESTORE #
49#define CFI_REMEMBER_STATE #
50#define CFI_RESTORE_STATE #
51#define CFI_UNDEFINED #
52#define CFI_SIGNAL_FRAME #
53
54#endif
55
56#endif
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index 7004251fc66b..33e793e991d0 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -2,6 +2,41 @@
2#define __ASM_E820_H 2#define __ASM_E820_H
3#define E820MAP 0x2d0 /* our map */ 3#define E820MAP 0x2d0 /* our map */
4#define E820MAX 128 /* number of entries in E820MAP */ 4#define E820MAX 128 /* number of entries in E820MAP */
5
6/*
7 * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
8 * constrained space in the zeropage. If we have more nodes than
9 * that, and if we've booted off EFI firmware, then the EFI tables
10 * passed us from the EFI firmware can list more nodes. Size our
11 * internal memory map tables to have room for these additional
12 * nodes, based on up to three entries per node for which the
13 * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
14 * plus E820MAX, allowing space for the possible duplicate E820
15 * entries that might need room in the same arrays, prior to the
16 * call to sanitize_e820_map() to remove duplicates. The allowance
17 * of three memory map entries per node is "enough" entries for
18 * the initial hardware platform motivating this mechanism to make
19 * use of additional EFI map entries. Future platforms may want
20 * to allow more than three entries per node or otherwise refine
21 * this size.
22 */
23
24/*
25 * Odd: 'make headers_check' complains about numa.h if I try
26 * to collapse the next two #ifdef lines to a single line:
27 * #if defined(__KERNEL__) && defined(CONFIG_EFI)
28 */
29#ifdef __KERNEL__
30#ifdef CONFIG_EFI
31#include <linux/numa.h>
32#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
33#else /* ! CONFIG_EFI */
34#define E820_X_MAX E820MAX
35#endif
36#else /* ! __KERNEL__ */
37#define E820_X_MAX E820MAX
38#endif
39
5#define E820NR 0x1e8 /* # entries in E820MAP */ 40#define E820NR 0x1e8 /* # entries in E820MAP */
6 41
7#define E820_RAM 1 42#define E820_RAM 1
@@ -9,6 +44,9 @@
9#define E820_ACPI 3 44#define E820_ACPI 3
10#define E820_NVS 4 45#define E820_NVS 4
11 46
47/* reserved RAM used by kernel itself */
48#define E820_RESERVED_KERN 128
49
12#ifndef __ASSEMBLY__ 50#ifndef __ASSEMBLY__
13struct e820entry { 51struct e820entry {
14 __u64 addr; /* start of memory segment */ 52 __u64 addr; /* start of memory segment */
@@ -18,22 +56,79 @@ struct e820entry {
18 56
19struct e820map { 57struct e820map {
20 __u32 nr_map; 58 __u32 nr_map;
21 struct e820entry map[E820MAX]; 59 struct e820entry map[E820_X_MAX];
22}; 60};
61
62/* see comment in arch/x86/kernel/e820.c */
63extern struct e820map e820;
64extern struct e820map e820_saved;
65
66extern int e820_any_mapped(u64 start, u64 end, unsigned type);
67extern int e820_all_mapped(u64 start, u64 end, unsigned type);
68extern void e820_add_region(u64 start, u64 size, int type);
69extern void e820_print_map(char *who);
70extern int
71sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
72extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
73 unsigned new_type);
74extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
75 int checktype);
76extern void update_e820(void);
77extern void e820_setup_gap(void);
78extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
79 unsigned long start_addr, unsigned long long end_addr);
80struct setup_data;
81extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
82
83#if defined(CONFIG_X86_64) || \
84 (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
85extern void e820_mark_nosave_regions(unsigned long limit_pfn);
86#else
87static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
88{
89}
90#endif
91
92extern unsigned long end_user_pfn;
93
94extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
95extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
96extern void reserve_early(u64 start, u64 end, char *name);
97extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
98extern void free_early(u64 start, u64 end);
99extern void early_res_to_bootmem(u64 start, u64 end);
100extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
101
102extern unsigned long e820_end_of_ram_pfn(void);
103extern unsigned long e820_end_of_low_ram_pfn(void);
104extern int e820_find_active_region(const struct e820entry *ei,
105 unsigned long start_pfn,
106 unsigned long last_pfn,
107 unsigned long *ei_startpfn,
108 unsigned long *ei_endpfn);
109extern void e820_register_active_regions(int nid, unsigned long start_pfn,
110 unsigned long end_pfn);
111extern u64 e820_hole_size(u64 start, u64 end);
112extern void finish_e820_parsing(void);
113extern void e820_reserve_resources(void);
114extern void setup_memory_map(void);
115extern char *default_machine_specific_memory_setup(void);
116extern char *machine_specific_memory_setup(void);
117extern char *memory_setup(void);
118
23#endif /* __ASSEMBLY__ */ 119#endif /* __ASSEMBLY__ */
24 120
25#define ISA_START_ADDRESS 0xa0000 121#define ISA_START_ADDRESS 0xa0000
26#define ISA_END_ADDRESS 0x100000 122#define ISA_END_ADDRESS 0x100000
123#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS)
27 124
28#define BIOS_BEGIN 0x000a0000 125#define BIOS_BEGIN 0x000a0000
29#define BIOS_END 0x00100000 126#define BIOS_END 0x00100000
30 127
31#ifdef __KERNEL__ 128#ifdef __KERNEL__
32#ifdef CONFIG_X86_32 129#include <linux/ioport.h>
33# include "e820_32.h" 130
34#else 131#define HIGH_MEMORY (1024*1024)
35# include "e820_64.h"
36#endif
37#endif /* __KERNEL__ */ 132#endif /* __KERNEL__ */
38 133
39#endif /* __ASM_E820_H */ 134#endif /* __ASM_E820_H */
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
deleted file mode 100644
index a9f7c6ec32bf..000000000000
--- a/include/asm-x86/e820_32.h
+++ /dev/null
@@ -1,50 +0,0 @@
1/*
2 * structures and definitions for the int 15, ax=e820 memory map
3 * scheme.
4 *
5 * In a nutshell, arch/i386/boot/setup.S populates a scratch table
6 * in the empty_zero_block that contains a list of usable address/size
7 * duples. In arch/i386/kernel/setup.c, this information is
8 * transferred into the e820map, and in arch/i386/mm/init.c, that
9 * new information is used to mark pages reserved or not.
10 *
11 */
12#ifndef __E820_HEADER
13#define __E820_HEADER
14
15#include <linux/ioport.h>
16
17#define HIGH_MEMORY (1024*1024)
18
19#ifndef __ASSEMBLY__
20
21extern struct e820map e820;
22extern void update_e820(void);
23
24extern int e820_all_mapped(unsigned long start, unsigned long end,
25 unsigned type);
26extern int e820_any_mapped(u64 start, u64 end, unsigned type);
27extern void propagate_e820_map(void);
28extern void register_bootmem_low_pages(unsigned long max_low_pfn);
29extern void add_memory_region(unsigned long long start,
30 unsigned long long size, int type);
31extern void update_memory_range(u64 start, u64 size, unsigned old_type,
32 unsigned new_type);
33extern void e820_register_memory(void);
34extern void limit_regions(unsigned long long size);
35extern void print_memory_map(char *who);
36extern void init_iomem_resources(struct resource *code_resource,
37 struct resource *data_resource,
38 struct resource *bss_resource);
39
40#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
41extern void e820_mark_nosave_regions(void);
42#else
43static inline void e820_mark_nosave_regions(void)
44{
45}
46#endif
47
48
49#endif/*!__ASSEMBLY__*/
50#endif/*__E820_HEADER*/
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
deleted file mode 100644
index 71c4d685d30d..000000000000
--- a/include/asm-x86/e820_64.h
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * structures and definitions for the int 15, ax=e820 memory map
3 * scheme.
4 *
5 * In a nutshell, setup.S populates a scratch table in the
6 * empty_zero_block that contains a list of usable address/size
7 * duples. setup.c, this information is transferred into the e820map,
8 * and in init.c/numa.c, that new information is used to mark pages
9 * reserved or not.
10 */
11#ifndef __E820_HEADER
12#define __E820_HEADER
13
14#include <linux/ioport.h>
15
16#ifndef __ASSEMBLY__
17extern unsigned long find_e820_area(unsigned long start, unsigned long end,
18 unsigned long size, unsigned long align);
19extern unsigned long find_e820_area_size(unsigned long start,
20 unsigned long *sizep,
21 unsigned long align);
22extern void add_memory_region(unsigned long start, unsigned long size,
23 int type);
24extern void update_memory_range(u64 start, u64 size, unsigned old_type,
25 unsigned new_type);
26extern void setup_memory_region(void);
27extern void contig_e820_setup(void);
28extern unsigned long e820_end_of_ram(void);
29extern void e820_reserve_resources(void);
30extern void e820_mark_nosave_regions(void);
31extern int e820_any_mapped(unsigned long start, unsigned long end,
32 unsigned type);
33extern int e820_all_mapped(unsigned long start, unsigned long end,
34 unsigned type);
35extern int e820_any_non_reserved(unsigned long start, unsigned long end);
36extern int is_memory_any_valid(unsigned long start, unsigned long end);
37extern int e820_all_non_reserved(unsigned long start, unsigned long end);
38extern int is_memory_all_valid(unsigned long start, unsigned long end);
39extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
40
41extern void e820_setup_gap(void);
42extern void e820_register_active_regions(int nid, unsigned long start_pfn,
43 unsigned long end_pfn);
44
45extern void finish_e820_parsing(void);
46
47extern struct e820map e820;
48extern void update_e820(void);
49
50extern void reserve_early(unsigned long start, unsigned long end, char *name);
51extern void free_early(unsigned long start, unsigned long end);
52extern void early_res_to_bootmem(unsigned long start, unsigned long end);
53
54#endif/*!__ASSEMBLY__*/
55
56#endif/*__E820_HEADER*/
diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index d53004b855cc..7ed2bd7a7f51 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -90,7 +90,7 @@ extern void *efi_ioremap(unsigned long addr, unsigned long size);
90 90
91#endif /* CONFIG_X86_32 */ 91#endif /* CONFIG_X86_32 */
92 92
93extern void efi_reserve_bootmem(void); 93extern void efi_reserve_early(void);
94extern void efi_call_phys_prelog(void); 94extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 95extern void efi_call_phys_epilog(void);
96 96
diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h
index 8f232dc5b5fe..7be4733c793e 100644
--- a/include/asm-x86/elf.h
+++ b/include/asm-x86/elf.h
@@ -83,9 +83,9 @@ extern unsigned int vdso_enabled;
83 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) 83 (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
84 84
85#include <asm/processor.h> 85#include <asm/processor.h>
86#include <asm/system.h>
86 87
87#ifdef CONFIG_X86_32 88#ifdef CONFIG_X86_32
88#include <asm/system.h> /* for savesegment */
89#include <asm/desc.h> 89#include <asm/desc.h>
90 90
91#define elf_check_arch(x) elf_check_arch_ia32(x) 91#define elf_check_arch(x) elf_check_arch_ia32(x)
diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h
index 5bd206973dca..44d4f8217349 100644
--- a/include/asm-x86/fixmap.h
+++ b/include/asm-x86/fixmap.h
@@ -7,7 +7,62 @@
7# include "fixmap_64.h" 7# include "fixmap_64.h"
8#endif 8#endif
9 9
10extern int fixmaps_set;
11
12void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
13void native_set_fixmap(enum fixed_addresses idx,
14 unsigned long phys, pgprot_t flags);
15
16#ifndef CONFIG_PARAVIRT
17static inline void __set_fixmap(enum fixed_addresses idx,
18 unsigned long phys, pgprot_t flags)
19{
20 native_set_fixmap(idx, phys, flags);
21}
22#endif
23
24#define set_fixmap(idx, phys) \
25 __set_fixmap(idx, phys, PAGE_KERNEL)
26
27/*
28 * Some hardware wants to get fixmapped without caching.
29 */
30#define set_fixmap_nocache(idx, phys) \
31 __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
32
10#define clear_fixmap(idx) \ 33#define clear_fixmap(idx) \
11 __set_fixmap(idx, 0, __pgprot(0)) 34 __set_fixmap(idx, 0, __pgprot(0))
12 35
36#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
37#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
38
39extern void __this_fixmap_does_not_exist(void);
40
41/*
42 * 'index to address' translation. If anyone tries to use the idx
43 * directly without translation, we catch the bug with a NULL-deference
44 * kernel oops. Illegal ranges of incoming indices are caught too.
45 */
46static __always_inline unsigned long fix_to_virt(const unsigned int idx)
47{
48 /*
49 * this branch gets completely eliminated after inlining,
50 * except when someone tries to use fixaddr indices in an
51 * illegal way. (such as mixing up address types or using
52 * out-of-range indices).
53 *
54 * If it doesn't get removed, the linker will complain
55 * loudly with a reasonably clear error message..
56 */
57 if (idx >= __end_of_fixed_addresses)
58 __this_fixmap_does_not_exist();
59
60 return __fix_to_virt(idx);
61}
62
63static inline unsigned long virt_to_fix(const unsigned long vaddr)
64{
65 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
66 return __virt_to_fix(vaddr);
67}
13#endif 68#endif
diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h
index 4b96148e90c1..aae2f0501a40 100644
--- a/include/asm-x86/fixmap_32.h
+++ b/include/asm-x86/fixmap_32.h
@@ -79,10 +79,6 @@ enum fixed_addresses {
79 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ 79 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
80 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, 80 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
81#endif 81#endif
82#ifdef CONFIG_ACPI
83 FIX_ACPI_BEGIN,
84 FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
85#endif
86#ifdef CONFIG_PCI_MMCONFIG 82#ifdef CONFIG_PCI_MMCONFIG
87 FIX_PCIE_MCFG, 83 FIX_PCIE_MCFG,
88#endif 84#endif
@@ -103,23 +99,18 @@ enum fixed_addresses {
103 (__end_of_permanent_fixed_addresses & 511), 99 (__end_of_permanent_fixed_addresses & 511),
104 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, 100 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
105 FIX_WP_TEST, 101 FIX_WP_TEST,
102#ifdef CONFIG_ACPI
103 FIX_ACPI_BEGIN,
104 FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
105#endif
106#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 106#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
107 FIX_OHCI1394_BASE, 107 FIX_OHCI1394_BASE,
108#endif 108#endif
109 __end_of_fixed_addresses 109 __end_of_fixed_addresses
110}; 110};
111 111
112extern void __set_fixmap(enum fixed_addresses idx,
113 unsigned long phys, pgprot_t flags);
114extern void reserve_top_address(unsigned long reserve); 112extern void reserve_top_address(unsigned long reserve);
115 113
116#define set_fixmap(idx, phys) \
117 __set_fixmap(idx, phys, PAGE_KERNEL)
118/*
119 * Some hardware wants to get fixmapped without caching.
120 */
121#define set_fixmap_nocache(idx, phys) \
122 __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
123 114
124#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) 115#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
125 116
@@ -128,38 +119,5 @@ extern void reserve_top_address(unsigned long reserve);
128#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) 119#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
129#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) 120#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
130 121
131#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
132#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
133
134extern void __this_fixmap_does_not_exist(void);
135
136/*
137 * 'index to address' translation. If anyone tries to use the idx
138 * directly without tranlation, we catch the bug with a NULL-deference
139 * kernel oops. Illegal ranges of incoming indices are caught too.
140 */
141static __always_inline unsigned long fix_to_virt(const unsigned int idx)
142{
143 /*
144 * this branch gets completely eliminated after inlining,
145 * except when someone tries to use fixaddr indices in an
146 * illegal way. (such as mixing up address types or using
147 * out-of-range indices).
148 *
149 * If it doesn't get removed, the linker will complain
150 * loudly with a reasonably clear error message..
151 */
152 if (idx >= __end_of_fixed_addresses)
153 __this_fixmap_does_not_exist();
154
155 return __fix_to_virt(idx);
156}
157
158static inline unsigned long virt_to_fix(const unsigned long vaddr)
159{
160 BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
161 return __virt_to_fix(vaddr);
162}
163
164#endif /* !__ASSEMBLY__ */ 122#endif /* !__ASSEMBLY__ */
165#endif 123#endif
diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h
index 355d26a75a82..6a4789d57e6c 100644
--- a/include/asm-x86/fixmap_64.h
+++ b/include/asm-x86/fixmap_64.h
@@ -12,6 +12,7 @@
12#define _ASM_FIXMAP_64_H 12#define _ASM_FIXMAP_64_H
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <asm/acpi.h>
15#include <asm/apicdef.h> 16#include <asm/apicdef.h>
16#include <asm/page.h> 17#include <asm/page.h>
17#include <asm/vsyscall.h> 18#include <asm/vsyscall.h>
@@ -46,23 +47,32 @@ enum fixed_addresses {
46 FIX_EFI_IO_MAP_LAST_PAGE, 47 FIX_EFI_IO_MAP_LAST_PAGE,
47 FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE 48 FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
48 + MAX_EFI_IO_PAGES - 1, 49 + MAX_EFI_IO_PAGES - 1,
50#ifdef CONFIG_PARAVIRT
51 FIX_PARAVIRT_BOOTMAP,
52#endif
53#ifdef CONFIG_ACPI
54 FIX_ACPI_BEGIN,
55 FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
56#endif
49#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 57#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
50 FIX_OHCI1394_BASE, 58 FIX_OHCI1394_BASE,
51#endif 59#endif
60 __end_of_permanent_fixed_addresses,
61 /*
62 * 256 temporary boot-time mappings, used by early_ioremap(),
63 * before ioremap() is functional.
64 *
65 * We round it up to the next 512 pages boundary so that we
66 * can have a single pgd entry and a single pte table:
67 */
68#define NR_FIX_BTMAPS 64
69#define FIX_BTMAPS_NESTING 4
70 FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
71 (__end_of_permanent_fixed_addresses & 511),
72 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
52 __end_of_fixed_addresses 73 __end_of_fixed_addresses
53}; 74};
54 75
55extern void __set_fixmap(enum fixed_addresses idx,
56 unsigned long phys, pgprot_t flags);
57
58#define set_fixmap(idx, phys) \
59 __set_fixmap(idx, phys, PAGE_KERNEL)
60/*
61 * Some hardware wants to get fixmapped without caching.
62 */
63#define set_fixmap_nocache(idx, phys) \
64 __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
65
66#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) 76#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
67#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) 77#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
68#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) 78#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
@@ -71,30 +81,4 @@ extern void __set_fixmap(enum fixed_addresses idx,
71#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) 81#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
72#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) 82#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
73 83
74#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
75
76extern void __this_fixmap_does_not_exist(void);
77
78/*
79 * 'index to address' translation. If anyone tries to use the idx
80 * directly without translation, we catch the bug with a NULL-deference
81 * kernel oops. Illegal ranges of incoming indices are caught too.
82 */
83static __always_inline unsigned long fix_to_virt(const unsigned int idx)
84{
85 /*
86 * this branch gets completely eliminated after inlining,
87 * except when someone tries to use fixaddr indices in an
88 * illegal way. (such as mixing up address types or using
89 * out-of-range indices).
90 *
91 * If it doesn't get removed, the linker will complain
92 * loudly with a reasonably clear error message..
93 */
94 if (idx >= __end_of_fixed_addresses)
95 __this_fixmap_does_not_exist();
96
97 return __fix_to_virt(idx);
98}
99
100#endif 84#endif
diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h
index 90958ed993fa..33b9aeeb35a2 100644
--- a/include/asm-x86/gart.h
+++ b/include/asm-x86/gart.h
@@ -1,34 +1,72 @@
1#ifndef _ASM_X8664_IOMMU_H 1#ifndef _ASM_X8664_GART_H
2#define _ASM_X8664_IOMMU_H 1 2#define _ASM_X8664_GART_H 1
3 3
4extern void pci_iommu_shutdown(void); 4#include <asm/e820.h>
5extern void no_iommu_init(void); 5#include <asm/iommu.h>
6extern int force_iommu, no_iommu; 6
7extern int iommu_detected; 7extern void set_up_gart_resume(u32, u32);
8#ifdef CONFIG_GART_IOMMU 8
9extern void gart_iommu_init(void);
10extern void gart_iommu_shutdown(void);
11extern void __init gart_parse_options(char *);
12extern void early_gart_iommu_check(void);
13extern void gart_iommu_hole_init(void);
14extern int fallback_aper_order; 9extern int fallback_aper_order;
15extern int fallback_aper_force; 10extern int fallback_aper_force;
16extern int gart_iommu_aperture;
17extern int gart_iommu_aperture_allowed;
18extern int gart_iommu_aperture_disabled;
19extern int fix_aperture; 11extern int fix_aperture;
20#else
21#define gart_iommu_aperture 0
22#define gart_iommu_aperture_allowed 0
23 12
24static inline void early_gart_iommu_check(void) 13/* PTE bits. */
14#define GPTE_VALID 1
15#define GPTE_COHERENT 2
16
17/* Aperture control register bits. */
18#define GARTEN (1<<0)
19#define DISGARTCPU (1<<4)
20#define DISGARTIO (1<<5)
21
22/* GART cache control register bits. */
23#define INVGART (1<<0)
24#define GARTPTEERR (1<<1)
25
26/* K8 On-cpu GART registers */
27#define AMD64_GARTAPERTURECTL 0x90
28#define AMD64_GARTAPERTUREBASE 0x94
29#define AMD64_GARTTABLEBASE 0x98
30#define AMD64_GARTCACHECTL 0x9c
31#define AMD64_GARTEN (1<<0)
32
33static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
25{ 34{
35 u32 tmp, ctl;
36
37 /* address of the mappings table */
38 addr >>= 12;
39 tmp = (u32) addr<<4;
40 tmp &= ~0xf;
41 pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
42
43 /* Enable GART translation for this hammer. */
44 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
45 ctl |= GARTEN;
46 ctl &= ~(DISGARTCPU | DISGARTIO);
47 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
26} 48}
27 49
28static inline void gart_iommu_shutdown(void) 50static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
29{ 51{
30} 52 if (!aper_base)
53 return 0;
31 54
32#endif 55 if (aper_base + aper_size > 0x100000000ULL) {
56 printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n");
57 return 0;
58 }
59 if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
60 printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n");
61 return 0;
62 }
63 if (aper_size < min_size) {
64 printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n",
65 aper_size>>20, min_size>>20);
66 return 0;
67 }
68
69 return 1;
70}
33 71
34#endif 72#endif
diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h
index b02ea6e17de8..33a73f5ed222 100644
--- a/include/asm-x86/genapic_32.h
+++ b/include/asm-x86/genapic_32.h
@@ -119,5 +119,10 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
119#define is_uv_system() 0 119#define is_uv_system() 0
120#define uv_wakeup_secondary(a, b) 1 120#define uv_wakeup_secondary(a, b) 1
121 121
122#ifdef CONFIG_X86_IO_APIC
123extern void force_mask_ioapic_irq_2(void);
124#else
125static inline void force_mask_ioapic_irq_2(void) { }
126#endif
122 127
123#endif 128#endif
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 1de931b263ce..647e4e5c2580 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -44,4 +44,12 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
44extern void uv_cpu_init(void); 44extern void uv_cpu_init(void);
45extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); 45extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
46 46
47extern void setup_apic_routing(void);
48
49#ifdef CONFIG_X86_IO_APIC
50extern void force_mask_ioapic_irq_2(void);
51#else
52static inline void force_mask_ioapic_irq_2(void) { }
53#endif
54
47#endif 55#endif
diff --git a/include/asm-x86/hardirq.h b/include/asm-x86/hardirq.h
index 314434d664e7..000787df66e6 100644
--- a/include/asm-x86/hardirq.h
+++ b/include/asm-x86/hardirq.h
@@ -3,3 +3,9 @@
3#else 3#else
4# include "hardirq_64.h" 4# include "hardirq_64.h"
5#endif 5#endif
6
7extern u64 arch_irq_stat_cpu(unsigned int cpu);
8#define arch_irq_stat_cpu arch_irq_stat_cpu
9
10extern u64 arch_irq_stat(void);
11#define arch_irq_stat arch_irq_stat
diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h
index e153f3b44774..4514b16cc723 100644
--- a/include/asm-x86/highmem.h
+++ b/include/asm-x86/highmem.h
@@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *ptr);
74 74
75#define flush_cache_kmaps() do { } while (0) 75#define flush_cache_kmaps() do { } while (0)
76 76
77extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
78 unsigned long end_pfn);
79
77#endif /* __KERNEL__ */ 80#endif /* __KERNEL__ */
78 81
79#endif /* _ASM_HIGHMEM_H */ 82#endif /* _ASM_HIGHMEM_H */
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index 6a9b4ac59bf7..82f1ac641bd7 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -86,8 +86,8 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
86#else /* CONFIG_HPET_TIMER */ 86#else /* CONFIG_HPET_TIMER */
87 87
88static inline int hpet_enable(void) { return 0; } 88static inline int hpet_enable(void) { return 0; }
89static inline unsigned long hpet_readl(unsigned long a) { return 0; }
90static inline int is_hpet_enabled(void) { return 0; } 89static inline int is_hpet_enabled(void) { return 0; }
90#define hpet_readl(a) 0
91 91
92#endif 92#endif
93#endif /* ASM_X86_HPET_H */ 93#endif /* ASM_X86_HPET_H */
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index bf025399d939..18f067c310f7 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -1,5 +1,106 @@
1#ifndef _ASM_HW_IRQ_H
2#define _ASM_HW_IRQ_H
3
4/*
5 * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
6 *
7 * moved some of the old arch/i386/kernel/irq.h to here. VY
8 *
9 * IRQ/IPI changes taken from work by Thomas Radke
10 * <tomsoft@informatik.tu-chemnitz.de>
11 *
12 * hacked by Andi Kleen for x86-64.
13 * unified by tglx
14 */
15
16#include <asm/irq_vectors.h>
17
18#ifndef __ASSEMBLY__
19
20#include <linux/percpu.h>
21#include <linux/profile.h>
22#include <linux/smp.h>
23
24#include <asm/atomic.h>
25#include <asm/irq.h>
26#include <asm/sections.h>
27
28#define platform_legacy_irq(irq) ((irq) < 16)
29
30/* Interrupt handlers registered during init_IRQ */
31extern void apic_timer_interrupt(void);
32extern void error_interrupt(void);
33extern void spurious_interrupt(void);
34extern void thermal_interrupt(void);
35extern void reschedule_interrupt(void);
36
37extern void invalidate_interrupt(void);
38extern void invalidate_interrupt0(void);
39extern void invalidate_interrupt1(void);
40extern void invalidate_interrupt2(void);
41extern void invalidate_interrupt3(void);
42extern void invalidate_interrupt4(void);
43extern void invalidate_interrupt5(void);
44extern void invalidate_interrupt6(void);
45extern void invalidate_interrupt7(void);
46
47extern void irq_move_cleanup_interrupt(void);
48extern void threshold_interrupt(void);
49
50extern void call_function_interrupt(void);
51
52/* PIC specific functions */
53extern void disable_8259A_irq(unsigned int irq);
54extern void enable_8259A_irq(unsigned int irq);
55extern int i8259A_irq_pending(unsigned int irq);
56extern void make_8259A_irq(unsigned int irq);
57extern void init_8259A(int aeoi);
58
59/* IOAPIC */
60#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
61extern unsigned long io_apic_irqs;
62
63extern void init_VISWS_APIC_irqs(void);
64extern void setup_IO_APIC(void);
65extern void disable_IO_APIC(void);
66extern void print_IO_APIC(void);
67extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
68extern void setup_ioapic_dest(void);
69
70#ifdef CONFIG_X86_64
71extern void enable_IO_APIC(void);
72#endif
73
74/* IPI functions */
75extern void send_IPI_self(int vector);
76extern void send_IPI(int dest, int vector);
77
78/* Statistics */
79extern atomic_t irq_err_count;
80extern atomic_t irq_mis_count;
81
82/* EISA */
83extern void eisa_set_level_irq(unsigned int irq);
84
85/* Voyager functions */
86extern asmlinkage void vic_cpi_interrupt(void);
87extern asmlinkage void vic_sys_interrupt(void);
88extern asmlinkage void vic_cmn_interrupt(void);
89extern asmlinkage void qic_timer_interrupt(void);
90extern asmlinkage void qic_invalidate_interrupt(void);
91extern asmlinkage void qic_reschedule_interrupt(void);
92extern asmlinkage void qic_enable_irq_interrupt(void);
93extern asmlinkage void qic_call_function_interrupt(void);
94
1#ifdef CONFIG_X86_32 95#ifdef CONFIG_X86_32
2# include "hw_irq_32.h" 96extern void (*const interrupt[NR_IRQS])(void);
3#else 97#else
4# include "hw_irq_64.h" 98typedef int vector_irq_t[NR_VECTORS];
99DECLARE_PER_CPU(vector_irq_t, vector_irq);
100extern spinlock_t vector_lock;
101#endif
102extern void setup_vector_irq(int cpu);
103
104#endif /* !ASSEMBLY_ */
105
5#endif 106#endif
diff --git a/include/asm-x86/hw_irq_32.h b/include/asm-x86/hw_irq_32.h
deleted file mode 100644
index ea88054e03f3..000000000000
--- a/include/asm-x86/hw_irq_32.h
+++ /dev/null
@@ -1,66 +0,0 @@
1#ifndef _ASM_HW_IRQ_H
2#define _ASM_HW_IRQ_H
3
4/*
5 * linux/include/asm/hw_irq.h
6 *
7 * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
8 *
9 * moved some of the old arch/i386/kernel/irq.h to here. VY
10 *
11 * IRQ/IPI changes taken from work by Thomas Radke
12 * <tomsoft@informatik.tu-chemnitz.de>
13 */
14
15#include <linux/profile.h>
16#include <asm/atomic.h>
17#include <asm/irq.h>
18#include <asm/sections.h>
19
20#define NMI_VECTOR 0x02
21
22/*
23 * Various low-level irq details needed by irq.c, process.c,
24 * time.c, io_apic.c and smp.c
25 *
26 * Interrupt entry/exit code at both C and assembly level
27 */
28
29extern void (*const interrupt[NR_IRQS])(void);
30
31#ifdef CONFIG_SMP
32void reschedule_interrupt(void);
33void invalidate_interrupt(void);
34void call_function_interrupt(void);
35#endif
36
37#ifdef CONFIG_X86_LOCAL_APIC
38void apic_timer_interrupt(void);
39void error_interrupt(void);
40void spurious_interrupt(void);
41void thermal_interrupt(void);
42#define platform_legacy_irq(irq) ((irq) < 16)
43#endif
44
45void disable_8259A_irq(unsigned int irq);
46void enable_8259A_irq(unsigned int irq);
47int i8259A_irq_pending(unsigned int irq);
48void make_8259A_irq(unsigned int irq);
49void init_8259A(int aeoi);
50void send_IPI_self(int vector);
51void init_VISWS_APIC_irqs(void);
52void setup_IO_APIC(void);
53void disable_IO_APIC(void);
54void print_IO_APIC(void);
55int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
56void send_IPI(int dest, int vector);
57void setup_ioapic_dest(void);
58
59extern unsigned long io_apic_irqs;
60
61extern atomic_t irq_err_count;
62extern atomic_t irq_mis_count;
63
64#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
65
66#endif /* _ASM_HW_IRQ_H */
diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
deleted file mode 100644
index 0062ef390f67..000000000000
--- a/include/asm-x86/hw_irq_64.h
+++ /dev/null
@@ -1,173 +0,0 @@
1#ifndef _ASM_HW_IRQ_H
2#define _ASM_HW_IRQ_H
3
4/*
5 * linux/include/asm/hw_irq.h
6 *
7 * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
8 *
9 * moved some of the old arch/i386/kernel/irq.h to here. VY
10 *
11 * IRQ/IPI changes taken from work by Thomas Radke
12 * <tomsoft@informatik.tu-chemnitz.de>
13 *
14 * hacked by Andi Kleen for x86-64.
15 */
16
17#ifndef __ASSEMBLY__
18#include <asm/atomic.h>
19#include <asm/irq.h>
20#include <linux/profile.h>
21#include <linux/smp.h>
22#include <linux/percpu.h>
23#endif
24
25#define NMI_VECTOR 0x02
26/*
27 * IDT vectors usable for external interrupt sources start
28 * at 0x20:
29 */
30#define FIRST_EXTERNAL_VECTOR 0x20
31
32#define IA32_SYSCALL_VECTOR 0x80
33
34
35/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
36 * cleanup after irq migration.
37 */
38#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
39
40/*
41 * Vectors 0x30-0x3f are used for ISA interrupts.
42 */
43#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
44#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
45#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
46#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
47#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
48#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
49#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
50#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
51#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
52#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
53#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
54#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
55#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
56#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
57#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
58#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
59
60/*
61 * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
62 *
63 * some of the following vectors are 'rare', they are merged
64 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
65 * TLB, reschedule and local APIC vectors are performance-critical.
66 */
67#define SPURIOUS_APIC_VECTOR 0xff
68#define ERROR_APIC_VECTOR 0xfe
69#define RESCHEDULE_VECTOR 0xfd
70#define CALL_FUNCTION_VECTOR 0xfc
71/* fb free - please don't readd KDB here because it's useless
72 (hint - think what a NMI bit does to a vector) */
73#define THERMAL_APIC_VECTOR 0xfa
74#define THRESHOLD_APIC_VECTOR 0xf9
75/* f8 free */
76#define INVALIDATE_TLB_VECTOR_END 0xf7
77#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
78
79#define NUM_INVALIDATE_TLB_VECTORS 8
80
81/*
82 * Local APIC timer IRQ vector is on a different priority level,
83 * to work around the 'lost local interrupt if more than 2 IRQ
84 * sources per level' errata.
85 */
86#define LOCAL_TIMER_VECTOR 0xef
87
88/*
89 * First APIC vector available to drivers: (vectors 0x30-0xee)
90 * we start at 0x41 to spread out vectors evenly between priority
91 * levels. (0x80 is the syscall vector)
92 */
93#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
94#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */
95
96
97#ifndef __ASSEMBLY__
98
99/* Interrupt handlers registered during init_IRQ */
100void apic_timer_interrupt(void);
101void spurious_interrupt(void);
102void error_interrupt(void);
103void reschedule_interrupt(void);
104void call_function_interrupt(void);
105void irq_move_cleanup_interrupt(void);
106void invalidate_interrupt0(void);
107void invalidate_interrupt1(void);
108void invalidate_interrupt2(void);
109void invalidate_interrupt3(void);
110void invalidate_interrupt4(void);
111void invalidate_interrupt5(void);
112void invalidate_interrupt6(void);
113void invalidate_interrupt7(void);
114void thermal_interrupt(void);
115void threshold_interrupt(void);
116void i8254_timer_resume(void);
117
118typedef int vector_irq_t[NR_VECTORS];
119DECLARE_PER_CPU(vector_irq_t, vector_irq);
120extern void __setup_vector_irq(int cpu);
121extern spinlock_t vector_lock;
122
123/*
124 * Various low-level irq details needed by irq.c, process.c,
125 * time.c, io_apic.c and smp.c
126 *
127 * Interrupt entry/exit code at both C and assembly level
128 */
129
130extern void disable_8259A_irq(unsigned int irq);
131extern void enable_8259A_irq(unsigned int irq);
132extern int i8259A_irq_pending(unsigned int irq);
133extern void make_8259A_irq(unsigned int irq);
134extern void init_8259A(int aeoi);
135extern void send_IPI_self(int vector);
136extern void init_VISWS_APIC_irqs(void);
137extern void setup_IO_APIC(void);
138extern void enable_IO_APIC(void);
139extern void disable_IO_APIC(void);
140extern void print_IO_APIC(void);
141extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
142extern void send_IPI(int dest, int vector);
143extern void setup_ioapic_dest(void);
144extern void native_init_IRQ(void);
145
146extern unsigned long io_apic_irqs;
147
148extern atomic_t irq_err_count;
149extern atomic_t irq_mis_count;
150
151#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
152
153#include <asm/ptrace.h>
154
155#define IRQ_NAME2(nr) nr##_interrupt(void)
156#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
157
158/*
159 * SMP has a few special interrupts for IPI messages
160 */
161
162#define BUILD_IRQ(nr) \
163 asmlinkage void IRQ_NAME(nr); \
164 asm("\n.p2align\n" \
165 "IRQ" #nr "_interrupt:\n\t" \
166 "push $~(" #nr ") ; " \
167 "jmp common_interrupt");
168
169#define platform_legacy_irq(irq) ((irq) < 16)
170
171#endif
172
173#endif /* _ASM_HW_IRQ_H */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 45d4df3e51e6..2f98df91f1f2 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -55,4 +55,6 @@ static inline void outb_pic(unsigned char value, unsigned int port)
55 udelay(2); 55 udelay(2);
56} 56}
57 57
58extern struct irq_chip i8259A_chip;
59
58#endif /* __ASM_I8259_H__ */ 60#endif /* __ASM_I8259_H__ */
diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h
index d5b11f60dbd0..bf5d629b3a39 100644
--- a/include/asm-x86/io.h
+++ b/include/asm-x86/io.h
@@ -3,6 +3,76 @@
3 3
4#define ARCH_HAS_IOREMAP_WC 4#define ARCH_HAS_IOREMAP_WC
5 5
6#include <linux/compiler.h>
7
8/*
9 * early_ioremap() and early_iounmap() are for temporary early boot-time
10 * mappings, before the real ioremap() is functional.
11 * A boot-time mapping is currently limited to at most 16 pages.
12 */
13#ifndef __ASSEMBLY__
14extern void early_ioremap_init(void);
15extern void early_ioremap_clear(void);
16extern void early_ioremap_reset(void);
17extern void *early_ioremap(unsigned long offset, unsigned long size);
18extern void early_iounmap(void *addr, unsigned long size);
19extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
20#endif
21
22#define build_mmio_read(name, size, type, reg, barrier) \
23static inline type name(const volatile void __iomem *addr) \
24{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \
25:"m" (*(volatile type __force *)addr) barrier); return ret; }
26
27#define build_mmio_write(name, size, type, reg, barrier) \
28static inline void name(type val, volatile void __iomem *addr) \
29{ asm volatile("mov" size " %0,%1": :reg (val), \
30"m" (*(volatile type __force *)addr) barrier); }
31
32build_mmio_read(readb, "b", unsigned char, "q", :"memory")
33build_mmio_read(readw, "w", unsigned short, "r", :"memory")
34build_mmio_read(readl, "l", unsigned int, "r", :"memory")
35
36build_mmio_read(__readb, "b", unsigned char, "q", )
37build_mmio_read(__readw, "w", unsigned short, "r", )
38build_mmio_read(__readl, "l", unsigned int, "r", )
39
40build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
41build_mmio_write(writew, "w", unsigned short, "r", :"memory")
42build_mmio_write(writel, "l", unsigned int, "r", :"memory")
43
44build_mmio_write(__writeb, "b", unsigned char, "q", )
45build_mmio_write(__writew, "w", unsigned short, "r", )
46build_mmio_write(__writel, "l", unsigned int, "r", )
47
48#define readb_relaxed(a) __readb(a)
49#define readw_relaxed(a) __readw(a)
50#define readl_relaxed(a) __readl(a)
51#define __raw_readb __readb
52#define __raw_readw __readw
53#define __raw_readl __readl
54
55#define __raw_writeb __writeb
56#define __raw_writew __writew
57#define __raw_writel __writel
58
59#define mmiowb() barrier()
60
61#ifdef CONFIG_X86_64
62build_mmio_read(readq, "q", unsigned long, "r", :"memory")
63build_mmio_read(__readq, "q", unsigned long, "r", )
64build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
65build_mmio_write(__writeq, "q", unsigned long, "r", )
66
67#define readq_relaxed(a) __readq(a)
68#define __raw_readq __readq
69#define __raw_writeq writeq
70
71/* Let people know we have them */
72#define readq readq
73#define writeq writeq
74#endif
75
6#ifdef CONFIG_X86_32 76#ifdef CONFIG_X86_32
7# include "io_32.h" 77# include "io_32.h"
8#else 78#else
@@ -16,4 +86,17 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
16 unsigned long prot_val); 86 unsigned long prot_val);
17extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); 87extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
18 88
89/*
90 * early_ioremap() and early_iounmap() are for temporary early boot-time
91 * mappings, before the real ioremap() is functional.
92 * A boot-time mapping is currently limited to at most 16 pages.
93 */
94extern void early_ioremap_init(void);
95extern void early_ioremap_clear(void);
96extern void early_ioremap_reset(void);
97extern void *early_ioremap(unsigned long offset, unsigned long size);
98extern void early_iounmap(void *addr, unsigned long size);
99extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
100
101
19#endif /* _ASM_X86_IO_H */ 102#endif /* _ASM_X86_IO_H */
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 049e81e797a0..4df44ed54077 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -122,18 +122,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
122extern void iounmap(volatile void __iomem *addr); 122extern void iounmap(volatile void __iomem *addr);
123 123
124/* 124/*
125 * early_ioremap() and early_iounmap() are for temporary early boot-time
126 * mappings, before the real ioremap() is functional.
127 * A boot-time mapping is currently limited to at most 16 pages.
128 */
129extern void early_ioremap_init(void);
130extern void early_ioremap_clear(void);
131extern void early_ioremap_reset(void);
132extern void *early_ioremap(unsigned long offset, unsigned long size);
133extern void early_iounmap(void *addr, unsigned long size);
134extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
135
136/*
137 * ISA I/O bus memory addresses are 1:1 with the physical address. 125 * ISA I/O bus memory addresses are 1:1 with the physical address.
138 */ 126 */
139#define isa_virt_to_bus virt_to_phys 127#define isa_virt_to_bus virt_to_phys
@@ -149,55 +137,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
149#define virt_to_bus virt_to_phys 137#define virt_to_bus virt_to_phys
150#define bus_to_virt phys_to_virt 138#define bus_to_virt phys_to_virt
151 139
152/*
153 * readX/writeX() are used to access memory mapped devices. On some
154 * architectures the memory mapped IO stuff needs to be accessed
155 * differently. On the x86 architecture, we just read/write the
156 * memory location directly.
157 */
158
159static inline unsigned char readb(const volatile void __iomem *addr)
160{
161 return *(volatile unsigned char __force *)addr;
162}
163
164static inline unsigned short readw(const volatile void __iomem *addr)
165{
166 return *(volatile unsigned short __force *)addr;
167}
168
169static inline unsigned int readl(const volatile void __iomem *addr)
170{
171 return *(volatile unsigned int __force *) addr;
172}
173
174#define readb_relaxed(addr) readb(addr)
175#define readw_relaxed(addr) readw(addr)
176#define readl_relaxed(addr) readl(addr)
177#define __raw_readb readb
178#define __raw_readw readw
179#define __raw_readl readl
180
181static inline void writeb(unsigned char b, volatile void __iomem *addr)
182{
183 *(volatile unsigned char __force *)addr = b;
184}
185
186static inline void writew(unsigned short b, volatile void __iomem *addr)
187{
188 *(volatile unsigned short __force *)addr = b;
189}
190
191static inline void writel(unsigned int b, volatile void __iomem *addr)
192{
193 *(volatile unsigned int __force *)addr = b;
194}
195#define __raw_writeb writeb
196#define __raw_writew writew
197#define __raw_writel writel
198
199#define mmiowb()
200
201static inline void 140static inline void
202memset_io(volatile void __iomem *addr, unsigned char val, int count) 141memset_io(volatile void __iomem *addr, unsigned char val, int count)
203{ 142{
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index 0930bedf9e4d..ddd8058a5026 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -204,77 +204,6 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
204#define virt_to_bus virt_to_phys 204#define virt_to_bus virt_to_phys
205#define bus_to_virt phys_to_virt 205#define bus_to_virt phys_to_virt
206 206
207/*
208 * readX/writeX() are used to access memory mapped devices. On some
209 * architectures the memory mapped IO stuff needs to be accessed
210 * differently. On the x86 architecture, we just read/write the
211 * memory location directly.
212 */
213
214static inline __u8 __readb(const volatile void __iomem *addr)
215{
216 return *(__force volatile __u8 *)addr;
217}
218
219static inline __u16 __readw(const volatile void __iomem *addr)
220{
221 return *(__force volatile __u16 *)addr;
222}
223
224static __always_inline __u32 __readl(const volatile void __iomem *addr)
225{
226 return *(__force volatile __u32 *)addr;
227}
228
229static inline __u64 __readq(const volatile void __iomem *addr)
230{
231 return *(__force volatile __u64 *)addr;
232}
233
234#define readb(x) __readb(x)
235#define readw(x) __readw(x)
236#define readl(x) __readl(x)
237#define readq(x) __readq(x)
238#define readb_relaxed(a) readb(a)
239#define readw_relaxed(a) readw(a)
240#define readl_relaxed(a) readl(a)
241#define readq_relaxed(a) readq(a)
242#define __raw_readb readb
243#define __raw_readw readw
244#define __raw_readl readl
245#define __raw_readq readq
246
247#define mmiowb()
248
249static inline void __writel(__u32 b, volatile void __iomem *addr)
250{
251 *(__force volatile __u32 *)addr = b;
252}
253
254static inline void __writeq(__u64 b, volatile void __iomem *addr)
255{
256 *(__force volatile __u64 *)addr = b;
257}
258
259static inline void __writeb(__u8 b, volatile void __iomem *addr)
260{
261 *(__force volatile __u8 *)addr = b;
262}
263
264static inline void __writew(__u16 b, volatile void __iomem *addr)
265{
266 *(__force volatile __u16 *)addr = b;
267}
268
269#define writeq(val, addr) __writeq((val), (addr))
270#define writel(val, addr) __writel((val), (addr))
271#define writew(val, addr) __writew((val), (addr))
272#define writeb(val, addr) __writeb((val), (addr))
273#define __raw_writeb writeb
274#define __raw_writew writew
275#define __raw_writel writel
276#define __raw_writeq writeq
277
278void __memcpy_fromio(void *, unsigned long, unsigned); 207void __memcpy_fromio(void *, unsigned long, unsigned);
279void __memcpy_toio(unsigned long, const void *, unsigned); 208void __memcpy_toio(unsigned long, const void *, unsigned);
280 209
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index d593e14f0341..14f82bbcb5fd 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -11,6 +11,15 @@
11 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar 11 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
12 */ 12 */
13 13
14/* I/O Unit Redirection Table */
15#define IO_APIC_REDIR_VECTOR_MASK 0x000FF
16#define IO_APIC_REDIR_DEST_LOGICAL 0x00800
17#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
18#define IO_APIC_REDIR_SEND_PENDING (1 << 12)
19#define IO_APIC_REDIR_REMOTE_IRR (1 << 14)
20#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
21#define IO_APIC_REDIR_MASKED (1 << 16)
22
14/* 23/*
15 * The structure of the IO-APIC: 24 * The structure of the IO-APIC:
16 */ 25 */
@@ -112,21 +121,32 @@ extern int nr_ioapic_registers[MAX_IO_APICS];
112 121
113#define MP_MAX_IOAPIC_PIN 127 122#define MP_MAX_IOAPIC_PIN 127
114 123
115struct mp_ioapic_routing { 124struct mp_config_ioapic {
116 int apic_id; 125 unsigned long mp_apicaddr;
117 int gsi_base; 126 unsigned int mp_apicid;
118 int gsi_end; 127 unsigned char mp_type;
119 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 128 unsigned char mp_apicver;
129 unsigned char mp_flags;
130};
131
132struct mp_config_intsrc {
133 unsigned int mp_dstapic;
134 unsigned char mp_type;
135 unsigned char mp_irqtype;
136 unsigned short mp_irqflag;
137 unsigned char mp_srcbus;
138 unsigned char mp_srcbusirq;
139 unsigned char mp_dstirq;
120}; 140};
121 141
122/* I/O APIC entries */ 142/* I/O APIC entries */
123extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; 143extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
124 144
125/* # of MP IRQ source entries */ 145/* # of MP IRQ source entries */
126extern int mp_irq_entries; 146extern int mp_irq_entries;
127 147
128/* MP IRQ source entries */ 148/* MP IRQ source entries */
129extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 149extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
130 150
131/* non-0 if default (table-less) MP configuration */ 151/* non-0 if default (table-less) MP configuration */
132extern int mpc_default_type; 152extern int mpc_default_type;
@@ -137,6 +157,9 @@ extern int sis_apic_bug;
137/* 1 if "noapic" boot option passed */ 157/* 1 if "noapic" boot option passed */
138extern int skip_ioapic_setup; 158extern int skip_ioapic_setup;
139 159
160/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
161extern int timer_through_8259;
162
140static inline void disable_ioapic_setup(void) 163static inline void disable_ioapic_setup(void)
141{ 164{
142 skip_ioapic_setup = 1; 165 skip_ioapic_setup = 1;
@@ -162,6 +185,8 @@ extern void ioapic_init_mappings(void);
162 185
163#else /* !CONFIG_X86_IO_APIC */ 186#else /* !CONFIG_X86_IO_APIC */
164#define io_apic_assign_pci_irqs 0 187#define io_apic_assign_pci_irqs 0
188static const int timer_through_8259 = 0;
189static inline void ioapic_init_mappings(void) { }
165#endif 190#endif
166 191
167#endif 192#endif
diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index 07862fdd23c0..068c9a40aa5b 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -1,29 +1,34 @@
1#ifndef _ASM_X8664_GART_H 1#ifndef _ASM_X8664_IOMMU_H
2#define _ASM_X8664_GART_H 1 2#define _ASM_X8664_IOMMU_H 1
3 3
4extern void pci_iommu_shutdown(void); 4extern void pci_iommu_shutdown(void);
5extern void no_iommu_init(void); 5extern void no_iommu_init(void);
6extern int force_iommu, no_iommu; 6extern int force_iommu, no_iommu;
7extern int iommu_detected; 7extern int iommu_detected;
8#ifdef CONFIG_IOMMU 8
9#ifdef CONFIG_GART_IOMMU
10extern int gart_iommu_aperture;
11extern int gart_iommu_aperture_allowed;
12extern int gart_iommu_aperture_disabled;
13
14extern void early_gart_iommu_check(void);
9extern void gart_iommu_init(void); 15extern void gart_iommu_init(void);
10extern void gart_iommu_shutdown(void); 16extern void gart_iommu_shutdown(void);
11extern void __init gart_parse_options(char *); 17extern void __init gart_parse_options(char *);
12extern void iommu_hole_init(void); 18extern void gart_iommu_hole_init(void);
13extern int fallback_aper_order; 19
14extern int fallback_aper_force;
15extern int iommu_aperture;
16extern int iommu_aperture_allowed;
17extern int iommu_aperture_disabled;
18extern int fix_aperture;
19#else 20#else
20#define iommu_aperture 0 21#define gart_iommu_aperture 0
21#define iommu_aperture_allowed 0 22#define gart_iommu_aperture_allowed 0
23#define gart_iommu_aperture_disabled 1
22 24
23static inline void gart_iommu_shutdown(void) 25static inline void early_gart_iommu_check(void)
24{ 26{
25} 27}
26 28
29static inline void gart_iommu_shutdown(void)
30{
31}
27#endif 32#endif
28 33
29#endif 34#endif
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index ecc80f341f37..196d63c28aa4 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -20,6 +20,7 @@
20 20
21#include <asm/hw_irq.h> 21#include <asm/hw_irq.h>
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/smp.h>
23 24
24/* 25/*
25 * the following functions deal with sending IPIs between CPUs. 26 * the following functions deal with sending IPIs between CPUs.
diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h
index 7ba905465a53..1a2925757317 100644
--- a/include/asm-x86/irq.h
+++ b/include/asm-x86/irq.h
@@ -1,5 +1,50 @@
1#ifdef CONFIG_X86_32 1#ifndef _ASM_IRQ_H
2# include "irq_32.h" 2#define _ASM_IRQ_H
3/*
4 * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
5 *
6 * IRQ/IPI changes taken from work by Thomas Radke
7 * <tomsoft@informatik.tu-chemnitz.de>
8 */
9
10#include <asm/apicdef.h>
11#include <asm/irq_vectors.h>
12
13static inline int irq_canonicalize(int irq)
14{
15 return ((irq == 2) ? 9 : irq);
16}
17
18#ifdef CONFIG_X86_LOCAL_APIC
19# define ARCH_HAS_NMI_WATCHDOG
20#endif
21
22#ifdef CONFIG_4KSTACKS
23 extern void irq_ctx_init(int cpu);
24 extern void irq_ctx_exit(int cpu);
25# define __ARCH_HAS_DO_SOFTIRQ
3#else 26#else
4# include "irq_64.h" 27# define irq_ctx_init(cpu) do { } while (0)
28# define irq_ctx_exit(cpu) do { } while (0)
29# ifdef CONFIG_X86_64
30# define __ARCH_HAS_DO_SOFTIRQ
31# endif
32#endif
33
34#ifdef CONFIG_IRQBALANCE
35extern int irqbalance_disable(char *str);
36#endif
37
38#ifdef CONFIG_HOTPLUG_CPU
39#include <linux/cpumask.h>
40extern void fixup_irqs(cpumask_t map);
5#endif 41#endif
42
43extern unsigned int do_IRQ(struct pt_regs *regs);
44extern void init_IRQ(void);
45extern void native_init_IRQ(void);
46
47/* Interrupt vector management */
48extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
49
50#endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h
deleted file mode 100644
index 0b79f3185243..000000000000
--- a/include/asm-x86/irq_32.h
+++ /dev/null
@@ -1,51 +0,0 @@
1#ifndef _ASM_IRQ_H
2#define _ASM_IRQ_H
3
4/*
5 * linux/include/asm/irq.h
6 *
7 * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
8 *
9 * IRQ/IPI changes taken from work by Thomas Radke
10 * <tomsoft@informatik.tu-chemnitz.de>
11 */
12
13#include <linux/sched.h>
14/* include comes from machine specific directory */
15#include "irq_vectors.h"
16#include <asm/thread_info.h>
17
18static inline int irq_canonicalize(int irq)
19{
20 return ((irq == 2) ? 9 : irq);
21}
22
23#ifdef CONFIG_X86_LOCAL_APIC
24# define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
25#endif
26
27#ifdef CONFIG_4KSTACKS
28 extern void irq_ctx_init(int cpu);
29 extern void irq_ctx_exit(int cpu);
30# define __ARCH_HAS_DO_SOFTIRQ
31#else
32# define irq_ctx_init(cpu) do { } while (0)
33# define irq_ctx_exit(cpu) do { } while (0)
34#endif
35
36#ifdef CONFIG_IRQBALANCE
37extern int irqbalance_disable(char *str);
38#endif
39
40#ifdef CONFIG_HOTPLUG_CPU
41extern void fixup_irqs(cpumask_t map);
42#endif
43
44unsigned int do_IRQ(struct pt_regs *regs);
45void init_IRQ(void);
46void __init native_init_IRQ(void);
47
48/* Interrupt vector management */
49extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
50
51#endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h
deleted file mode 100644
index 083d35a62c94..000000000000
--- a/include/asm-x86/irq_64.h
+++ /dev/null
@@ -1,51 +0,0 @@
1#ifndef _ASM_IRQ_H
2#define _ASM_IRQ_H
3
4/*
5 * linux/include/asm/irq.h
6 *
7 * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
8 *
9 * IRQ/IPI changes taken from work by Thomas Radke
10 * <tomsoft@informatik.tu-chemnitz.de>
11 */
12
13#define TIMER_IRQ 0
14
15/*
16 * 16 8259A IRQ's, 208 potential APIC interrupt sources.
17 * Right now the APIC is mostly only used for SMP.
18 * 256 vectors is an architectural limit. (we can have
19 * more than 256 devices theoretically, but they will
20 * have to use shared interrupts)
21 * Since vectors 0x00-0x1f are used/reserved for the CPU,
22 * the usable vector space is 0x20-0xff (224 vectors)
23 */
24
25/*
26 * The maximum number of vectors supported by x86_64 processors
27 * is limited to 256. For processors other than x86_64, NR_VECTORS
28 * should be changed accordingly.
29 */
30#define NR_VECTORS 256
31
32#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */
33
34#define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
35#define NR_IRQ_VECTORS NR_IRQS
36
37static inline int irq_canonicalize(int irq)
38{
39 return ((irq == 2) ? 9 : irq);
40}
41
42#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
43
44#ifdef CONFIG_HOTPLUG_CPU
45#include <linux/cpumask.h>
46extern void fixup_irqs(cpumask_t map);
47#endif
48
49#define __ARCH_HAS_DO_SOFTIRQ 1
50
51#endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
new file mode 100644
index 000000000000..0ac864ef3cd4
--- /dev/null
+++ b/include/asm-x86/irq_vectors.h
@@ -0,0 +1,169 @@
1#ifndef _ASM_IRQ_VECTORS_H
2#define _ASM_IRQ_VECTORS_H
3
4#include <linux/threads.h>
5
6#define NMI_VECTOR 0x02
7
8/*
9 * IDT vectors usable for external interrupt sources start
10 * at 0x20:
11 */
12#define FIRST_EXTERNAL_VECTOR 0x20
13
14#ifdef CONFIG_X86_32
15# define SYSCALL_VECTOR 0x80
16#else
17# define IA32_SYSCALL_VECTOR 0x80
18#endif
19
20/*
21 * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
22 * cleanup after irq migration on 64 bit.
23 */
24#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
25
26/*
27 * Vectors 0x20-0x2f are used for ISA interrupts on 32 bit.
28 * Vectors 0x30-0x3f are used for ISA interrupts on 64 bit.
29 */
30#ifdef CONFIG_X86_32
31#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR)
32#else
33#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
34#endif
35#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
36#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
37#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
38#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
39#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
40#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
41#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
42#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
43#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
44#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
45#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
46#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
47#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
48#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
49#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
50
51/*
52 * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
53 *
54 * some of the following vectors are 'rare', they are merged
55 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
56 * TLB, reschedule and local APIC vectors are performance-critical.
57 *
58 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
59 */
60#ifdef CONFIG_X86_32
61
62# define SPURIOUS_APIC_VECTOR 0xff
63# define ERROR_APIC_VECTOR 0xfe
64# define INVALIDATE_TLB_VECTOR 0xfd
65# define RESCHEDULE_VECTOR 0xfc
66# define CALL_FUNCTION_VECTOR 0xfb
67# define THERMAL_APIC_VECTOR 0xf0
68
69#else
70
71#define SPURIOUS_APIC_VECTOR 0xff
72#define ERROR_APIC_VECTOR 0xfe
73#define RESCHEDULE_VECTOR 0xfd
74#define CALL_FUNCTION_VECTOR 0xfc
75#define THERMAL_APIC_VECTOR 0xfa
76#define THRESHOLD_APIC_VECTOR 0xf9
77#define INVALIDATE_TLB_VECTOR_END 0xf7
78#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
79
80#define NUM_INVALIDATE_TLB_VECTORS 8
81
82#endif
83
84/*
85 * Local APIC timer IRQ vector is on a different priority level,
86 * to work around the 'lost local interrupt if more than 2 IRQ
87 * sources per level' errata.
88 */
89#define LOCAL_TIMER_VECTOR 0xef
90
91/*
92 * First APIC vector available to drivers: (vectors 0x30-0xee) we
93 * start at 0x31(0x41) to spread out vectors evenly between priority
94 * levels. (0x80 is the syscall vector)
95 */
96#ifdef CONFIG_X86_32
97# define FIRST_DEVICE_VECTOR 0x31
98#else
99# define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
100#endif
101
102#define NR_VECTORS 256
103
104#define FPU_IRQ 13
105
106#define FIRST_VM86_IRQ 3
107#define LAST_VM86_IRQ 15
108#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
109
110#if !defined(CONFIG_X86_VOYAGER)
111
112# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
113
114# define NR_IRQS 224
115
116# if (224 >= 32 * NR_CPUS)
117# define NR_IRQ_VECTORS NR_IRQS
118# else
119# define NR_IRQ_VECTORS (32 * NR_CPUS)
120# endif
121
122# else /* IO_APIC || PARAVIRT */
123
124# define NR_IRQS 16
125# define NR_IRQ_VECTORS NR_IRQS
126
127# endif
128
129#else /* !VISWS && !VOYAGER */
130
131# define NR_IRQS 224
132# define NR_IRQ_VECTORS NR_IRQS
133
134#endif /* VISWS */
135
136/* Voyager specific defines */
137/* These define the CPIs we use in linux */
138#define VIC_CPI_LEVEL0 0
139#define VIC_CPI_LEVEL1 1
140/* now the fake CPIs */
141#define VIC_TIMER_CPI 2
142#define VIC_INVALIDATE_CPI 3
143#define VIC_RESCHEDULE_CPI 4
144#define VIC_ENABLE_IRQ_CPI 5
145#define VIC_CALL_FUNCTION_CPI 6
146
147/* Now the QIC CPIs: Since we don't need the two initial levels,
148 * these are 2 less than the VIC CPIs */
149#define QIC_CPI_OFFSET 1
150#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET)
151#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
152#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
153#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
154#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
155
156#define VIC_START_FAKE_CPI VIC_TIMER_CPI
157#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI
158
159/* this is the SYS_INT CPI. */
160#define VIC_SYS_INT 8
161#define VIC_CMN_INT 15
162
163/* This is the boot CPI for alternate processors. It gets overwritten
164 * by the above once the system has activated all available processors */
165#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0
166#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8)
167
168
169#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970e..17e7a1701c97 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -111,14 +111,35 @@ static inline unsigned long __raw_local_irq_save(void)
111#define DISABLE_INTERRUPTS(x) cli 111#define DISABLE_INTERRUPTS(x) cli
112 112
113#ifdef CONFIG_X86_64 113#ifdef CONFIG_X86_64
114#define SWAPGS swapgs
115/*
116 * Currently paravirt can't handle swapgs nicely when we
117 * don't have a stack we can rely on (such as a user space
118 * stack). So we either find a way around these or just fault
119 * and emulate if a guest tries to call swapgs directly.
120 *
121 * Either way, this is a good way to document that we don't
122 * have a reliable stack. x86_64 only.
123 */
124#define SWAPGS_UNSAFE_STACK swapgs
125
126#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
127
114#define INTERRUPT_RETURN iretq 128#define INTERRUPT_RETURN iretq
115#define ENABLE_INTERRUPTS_SYSCALL_RET \ 129#define USERGS_SYSRET64 \
116 movq %gs:pda_oldrsp, %rsp; \ 130 swapgs; \
117 swapgs; \ 131 sysretq;
118 sysretq; 132#define USERGS_SYSRET32 \
133 swapgs; \
134 sysretl
135#define ENABLE_INTERRUPTS_SYSEXIT32 \
136 swapgs; \
137 sti; \
138 sysexit
139
119#else 140#else
120#define INTERRUPT_RETURN iret 141#define INTERRUPT_RETURN iret
121#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit 142#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
122#define GET_CR0_INTO_EAX movl %cr0, %eax 143#define GET_CR0_INTO_EAX movl %cr0, %eax
123#endif 144#endif
124 145
@@ -169,16 +190,6 @@ static inline void trace_hardirqs_fixup(void)
169#else 190#else
170 191
171#ifdef CONFIG_X86_64 192#ifdef CONFIG_X86_64
172/*
173 * Currently paravirt can't handle swapgs nicely when we
174 * don't have a stack we can rely on (such as a user space
175 * stack). So we either find a way around these or just fault
176 * and emulate if a guest tries to call swapgs directly.
177 *
178 * Either way, this is a good way to document that we don't
179 * have a reliable stack. x86_64 only.
180 */
181#define SWAPGS_UNSAFE_STACK swapgs
182#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk 193#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
183#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk 194#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
184#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk 195#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h
index 8327907c79bf..017c8c19ad8f 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/mach-bigsmp/mach_apic.h
@@ -81,7 +81,7 @@ static inline int multi_timer_check(int apic, int irq)
81 81
82static inline int apicid_to_node(int logical_apicid) 82static inline int apicid_to_node(int logical_apicid)
83{ 83{
84 return (0); 84 return apicid_2_node[hard_smp_processor_id()];
85} 85}
86 86
87static inline int cpu_present_to_apicid(int mps_cpu) 87static inline int cpu_present_to_apicid(int mps_cpu)
diff --git a/include/asm-x86/mach-bigsmp/mach_mpspec.h b/include/asm-x86/mach-bigsmp/mach_mpspec.h
deleted file mode 100644
index 6b5dadcf1d0e..000000000000
--- a/include/asm-x86/mach-bigsmp/mach_mpspec.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef __ASM_MACH_MPSPEC_H
2#define __ASM_MACH_MPSPEC_H
3
4#define MAX_IRQ_SOURCES 256
5
6#define MAX_MP_BUSSES 32
7
8#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-default/irq_vectors.h b/include/asm-x86/mach-default/irq_vectors.h
deleted file mode 100644
index 881c63ca61ad..000000000000
--- a/include/asm-x86/mach-default/irq_vectors.h
+++ /dev/null
@@ -1,96 +0,0 @@
1/*
2 * This file should contain #defines for all of the interrupt vector
3 * numbers used by this architecture.
4 *
5 * In addition, there are some standard defines:
6 *
7 * FIRST_EXTERNAL_VECTOR:
8 * The first free place for external interrupts
9 *
10 * SYSCALL_VECTOR:
11 * The IRQ vector a syscall makes the user to kernel transition
12 * under.
13 *
14 * TIMER_IRQ:
15 * The IRQ number the timer interrupt comes in at.
16 *
17 * NR_IRQS:
18 * The total number of interrupt vectors (including all the
19 * architecture specific interrupts) needed.
20 *
21 */
22#ifndef _ASM_IRQ_VECTORS_H
23#define _ASM_IRQ_VECTORS_H
24
25/*
26 * IDT vectors usable for external interrupt sources start
27 * at 0x20:
28 */
29#define FIRST_EXTERNAL_VECTOR 0x20
30
31#define SYSCALL_VECTOR 0x80
32
33/*
34 * Vectors 0x20-0x2f are used for ISA interrupts.
35 */
36
37/*
38 * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
39 *
40 * some of the following vectors are 'rare', they are merged
41 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
42 * TLB, reschedule and local APIC vectors are performance-critical.
43 *
44 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
45 */
46#define SPURIOUS_APIC_VECTOR 0xff
47#define ERROR_APIC_VECTOR 0xfe
48#define INVALIDATE_TLB_VECTOR 0xfd
49#define RESCHEDULE_VECTOR 0xfc
50#define CALL_FUNCTION_VECTOR 0xfb
51
52#define THERMAL_APIC_VECTOR 0xf0
53/*
54 * Local APIC timer IRQ vector is on a different priority level,
55 * to work around the 'lost local interrupt if more than 2 IRQ
56 * sources per level' errata.
57 */
58#define LOCAL_TIMER_VECTOR 0xef
59
60/*
61 * First APIC vector available to drivers: (vectors 0x30-0xee)
62 * we start at 0x31 to spread out vectors evenly between priority
63 * levels. (0x80 is the syscall vector)
64 */
65#define FIRST_DEVICE_VECTOR 0x31
66#define FIRST_SYSTEM_VECTOR 0xef
67
68#define TIMER_IRQ 0
69
70/*
71 * 16 8259A IRQ's, 208 potential APIC interrupt sources.
72 * Right now the APIC is mostly only used for SMP.
73 * 256 vectors is an architectural limit. (we can have
74 * more than 256 devices theoretically, but they will
75 * have to use shared interrupts)
76 * Since vectors 0x00-0x1f are used/reserved for the CPU,
77 * the usable vector space is 0x20-0xff (224 vectors)
78 */
79
80/*
81 * The maximum number of vectors supported by i386 processors
82 * is limited to 256. For processors other than i386, NR_VECTORS
83 * should be changed accordingly.
84 */
85#define NR_VECTORS 256
86
87#include "irq_vectors_limits.h"
88
89#define FPU_IRQ 13
90
91#define FIRST_VM86_IRQ 3
92#define LAST_VM86_IRQ 15
93#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
94
95
96#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/mach-default/irq_vectors_limits.h b/include/asm-x86/mach-default/irq_vectors_limits.h
deleted file mode 100644
index a90c7a60109f..000000000000
--- a/include/asm-x86/mach-default/irq_vectors_limits.h
+++ /dev/null
@@ -1,16 +0,0 @@
1#ifndef _ASM_IRQ_VECTORS_LIMITS_H
2#define _ASM_IRQ_VECTORS_LIMITS_H
3
4#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT)
5#define NR_IRQS 224
6# if (224 >= 32 * NR_CPUS)
7# define NR_IRQ_VECTORS NR_IRQS
8# else
9# define NR_IRQ_VECTORS (32 * NR_CPUS)
10# endif
11#else
12#define NR_IRQS 16
13#define NR_IRQ_VECTORS NR_IRQS
14#endif
15
16#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 21003b56ae95..0b2cde5e1b74 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -77,7 +77,11 @@ static inline void setup_apic_routing(void)
77 77
78static inline int apicid_to_node(int logical_apicid) 78static inline int apicid_to_node(int logical_apicid)
79{ 79{
80#ifdef CONFIG_SMP
81 return apicid_2_node[hard_smp_processor_id()];
82#else
80 return 0; 83 return 0;
84#endif
81} 85}
82#endif 86#endif
83 87
diff --git a/include/asm-x86/mach-default/setup_arch.h b/include/asm-x86/mach-default/setup_arch.h
index 605e3ccb991b..38846208b548 100644
--- a/include/asm-x86/mach-default/setup_arch.h
+++ b/include/asm-x86/mach-default/setup_arch.h
@@ -1,7 +1,3 @@
1/* Hook to call BIOS initialisation function */ 1/* Hook to call BIOS initialisation function */
2 2
3/* no action for generic */ 3/* no action for generic */
4
5#ifndef ARCH_SETUP
6#define ARCH_SETUP
7#endif
diff --git a/include/asm-x86/mach-default/smpboot_hooks.h b/include/asm-x86/mach-default/smpboot_hooks.h
index 56d0e1fa0258..56d001b9dce4 100644
--- a/include/asm-x86/mach-default/smpboot_hooks.h
+++ b/include/asm-x86/mach-default/smpboot_hooks.h
@@ -3,7 +3,9 @@
3 3
4static inline void smpboot_clear_io_apic_irqs(void) 4static inline void smpboot_clear_io_apic_irqs(void)
5{ 5{
6#ifdef CONFIG_X86_IO_APIC
6 io_apic_irqs = 0; 7 io_apic_irqs = 0;
8#endif
7} 9}
8 10
9static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) 11static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
@@ -35,17 +37,23 @@ static inline void smpboot_restore_warm_reset_vector(void)
35 37
36static inline void __init smpboot_setup_io_apic(void) 38static inline void __init smpboot_setup_io_apic(void)
37{ 39{
40#ifdef CONFIG_X86_IO_APIC
38 /* 41 /*
39 * Here we can be sure that there is an IO-APIC in the system. Let's 42 * Here we can be sure that there is an IO-APIC in the system. Let's
40 * go and set it up: 43 * go and set it up:
41 */ 44 */
42 if (!skip_ioapic_setup && nr_ioapics) 45 if (!skip_ioapic_setup && nr_ioapics)
43 setup_IO_APIC(); 46 setup_IO_APIC();
44 else 47 else {
45 nr_ioapics = 0; 48 nr_ioapics = 0;
49 localise_nmi_watchdog();
50 }
51#endif
46} 52}
47 53
48static inline void smpboot_clear_io_apic(void) 54static inline void smpboot_clear_io_apic(void)
49{ 55{
56#ifdef CONFIG_X86_IO_APIC
50 nr_ioapics = 0; 57 nr_ioapics = 0;
58#endif
51} 59}
diff --git a/include/asm-x86/mach-es7000/mach_mpspec.h b/include/asm-x86/mach-es7000/mach_mpspec.h
deleted file mode 100644
index b1f5039d4506..000000000000
--- a/include/asm-x86/mach-es7000/mach_mpspec.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef __ASM_MACH_MPSPEC_H
2#define __ASM_MACH_MPSPEC_H
3
4#define MAX_IRQ_SOURCES 256
5
6#define MAX_MP_BUSSES 256
7
8#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h
index 0d0b5ba2e9d1..586cadbf3787 100644
--- a/include/asm-x86/mach-generic/mach_mpparse.h
+++ b/include/asm-x86/mach-generic/mach_mpparse.h
@@ -1,7 +1,10 @@
1#ifndef _MACH_MPPARSE_H 1#ifndef _MACH_MPPARSE_H
2#define _MACH_MPPARSE_H 1 2#define _MACH_MPPARSE_H 1
3 3
4int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); 4
5int acpi_madt_oem_check(char *oem_id, char *oem_table_id); 5extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
6 char *productid);
7
8extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
6 9
7#endif 10#endif
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h
index 75a56e5afbe7..d802465e026a 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/mach-numaq/mach_apic.h
@@ -20,8 +20,14 @@ static inline cpumask_t target_cpus(void)
20#define INT_DELIVERY_MODE dest_LowestPrio 20#define INT_DELIVERY_MODE dest_LowestPrio
21#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ 21#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */
22 22
23#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap) 23static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
24#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map) 24{
25 return physid_isset(apicid, bitmap);
26}
27static inline unsigned long check_apicid_present(int bit)
28{
29 return physid_isset(bit, phys_cpu_present_map);
30}
25#define apicid_cluster(apicid) (apicid & 0xF0) 31#define apicid_cluster(apicid) (apicid & 0xF0)
26 32
27static inline int apic_id_registered(void) 33static inline int apic_id_registered(void)
@@ -77,11 +83,6 @@ static inline int cpu_present_to_apicid(int mps_cpu)
77 return BAD_APICID; 83 return BAD_APICID;
78} 84}
79 85
80static inline int generate_logical_apicid(int quad, int phys_apicid)
81{
82 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
83}
84
85static inline int apicid_to_node(int logical_apicid) 86static inline int apicid_to_node(int logical_apicid)
86{ 87{
87 return logical_apicid >> 4; 88 return logical_apicid >> 4;
@@ -95,30 +96,6 @@ static inline physid_mask_t apicid_to_cpu_present(int logical_apicid)
95 return physid_mask_of_physid(cpu + 4*node); 96 return physid_mask_of_physid(cpu + 4*node);
96} 97}
97 98
98struct mpc_config_translation {
99 unsigned char mpc_type;
100 unsigned char trans_len;
101 unsigned char trans_type;
102 unsigned char trans_quad;
103 unsigned char trans_global;
104 unsigned char trans_local;
105 unsigned short trans_reserved;
106};
107
108static inline int mpc_apic_id(struct mpc_config_processor *m,
109 struct mpc_config_translation *translation_record)
110{
111 int quad = translation_record->trans_quad;
112 int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
113
114 printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
115 m->mpc_apicid,
116 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
117 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
118 m->mpc_apicver, quad, logical_apicid);
119 return logical_apicid;
120}
121
122extern void *xquad_portio; 99extern void *xquad_portio;
123 100
124static inline void setup_portio_remap(void) 101static inline void setup_portio_remap(void)
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
index 459b12401187..626aef6b155f 100644
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ b/include/asm-x86/mach-numaq/mach_mpparse.h
@@ -1,14 +1,7 @@
1#ifndef __ASM_MACH_MPPARSE_H 1#ifndef __ASM_MACH_MPPARSE_H
2#define __ASM_MACH_MPPARSE_H 2#define __ASM_MACH_MPPARSE_H
3 3
4extern void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, 4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 struct mpc_config_translation *translation); 5 char *productid);
6extern void mpc_oem_pci_bus(struct mpc_config_bus *m,
7 struct mpc_config_translation *translation);
8
9/* Hook from generic ACPI tables.c */
10static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
11{
12}
13 6
14#endif /* __ASM_MACH_MPPARSE_H */ 7#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpspec.h b/include/asm-x86/mach-numaq/mach_mpspec.h
deleted file mode 100644
index dffb09856f8f..000000000000
--- a/include/asm-x86/mach-numaq/mach_mpspec.h
+++ /dev/null
@@ -1,8 +0,0 @@
1#ifndef __ASM_MACH_MPSPEC_H
2#define __ASM_MACH_MPSPEC_H
3
4#define MAX_IRQ_SOURCES 512
5
6#define MAX_MP_BUSSES 32
7
8#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-summit/mach_mpspec.h b/include/asm-x86/mach-summit/mach_mpspec.h
deleted file mode 100644
index bd765523511a..000000000000
--- a/include/asm-x86/mach-summit/mach_mpspec.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef __ASM_MACH_MPSPEC_H
2#define __ASM_MACH_MPSPEC_H
3
4#define MAX_IRQ_SOURCES 256
5
6/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
7#define MAX_MP_BUSSES 260
8
9#endif /* __ASM_MACH_MPSPEC_H */
diff --git a/include/asm-x86/mach-visws/irq_vectors.h b/include/asm-x86/mach-visws/irq_vectors.h
deleted file mode 100644
index cb572d8db505..000000000000
--- a/include/asm-x86/mach-visws/irq_vectors.h
+++ /dev/null
@@ -1,62 +0,0 @@
1#ifndef _ASM_IRQ_VECTORS_H
2#define _ASM_IRQ_VECTORS_H
3
4/*
5 * IDT vectors usable for external interrupt sources start
6 * at 0x20:
7 */
8#define FIRST_EXTERNAL_VECTOR 0x20
9
10#define SYSCALL_VECTOR 0x80
11
12/*
13 * Vectors 0x20-0x2f are used for ISA interrupts.
14 */
15
16/*
17 * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
18 *
19 * some of the following vectors are 'rare', they are merged
20 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
21 * TLB, reschedule and local APIC vectors are performance-critical.
22 *
23 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
24 */
25#define SPURIOUS_APIC_VECTOR 0xff
26#define ERROR_APIC_VECTOR 0xfe
27#define INVALIDATE_TLB_VECTOR 0xfd
28#define RESCHEDULE_VECTOR 0xfc
29#define CALL_FUNCTION_VECTOR 0xfb
30
31#define THERMAL_APIC_VECTOR 0xf0
32/*
33 * Local APIC timer IRQ vector is on a different priority level,
34 * to work around the 'lost local interrupt if more than 2 IRQ
35 * sources per level' errata.
36 */
37#define LOCAL_TIMER_VECTOR 0xef
38
39/*
40 * First APIC vector available to drivers: (vectors 0x30-0xee)
41 * we start at 0x31 to spread out vectors evenly between priority
42 * levels. (0x80 is the syscall vector)
43 */
44#define FIRST_DEVICE_VECTOR 0x31
45#define FIRST_SYSTEM_VECTOR 0xef
46
47#define TIMER_IRQ 0
48
49/*
50 * IRQ definitions
51 */
52#define NR_VECTORS 256
53#define NR_IRQS 224
54#define NR_IRQ_VECTORS NR_IRQS
55
56#define FPU_IRQ 13
57
58#define FIRST_VM86_IRQ 3
59#define LAST_VM86_IRQ 15
60#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
61
62#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/mach-visws/mach_apic.h b/include/asm-x86/mach-visws/mach_apic.h
index a9ef33a8a995..6943e7a1d0e6 100644
--- a/include/asm-x86/mach-visws/mach_apic.h
+++ b/include/asm-x86/mach-visws/mach_apic.h
@@ -1,103 +1 @@
1#ifndef __ASM_MACH_APIC_H #include "../mach-default/mach_apic.h"
2#define __ASM_MACH_APIC_H
3
4#include <mach_apicdef.h>
5#include <asm/smp.h>
6
7#define APIC_DFR_VALUE (APIC_DFR_FLAT)
8
9#define no_balance_irq (0)
10#define esr_disable (0)
11
12#define INT_DELIVERY_MODE dest_LowestPrio
13#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
14
15#ifdef CONFIG_SMP
16 #define TARGET_CPUS cpu_online_map
17#else
18 #define TARGET_CPUS cpumask_of_cpu(0)
19#endif
20
21#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap)
22#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map)
23
24static inline int apic_id_registered(void)
25{
26 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
27}
28
29/*
30 * Set up the logical destination ID.
31 *
32 * Intel recommends to set DFR, LDR and TPR before enabling
33 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
34 * document number 292116). So here it goes...
35 */
36static inline void init_apic_ldr(void)
37{
38 unsigned long val;
39
40 apic_write_around(APIC_DFR, APIC_DFR_VALUE);
41 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
42 val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
43 apic_write_around(APIC_LDR, val);
44}
45
46static inline void summit_check(char *oem, char *productid)
47{
48}
49
50static inline void setup_apic_routing(void)
51{
52}
53
54static inline int apicid_to_node(int logical_apicid)
55{
56 return 0;
57}
58
59/* Mapping from cpu number to logical apicid */
60static inline int cpu_to_logical_apicid(int cpu)
61{
62 return 1 << cpu;
63}
64
65static inline int cpu_present_to_apicid(int mps_cpu)
66{
67 if (mps_cpu < get_physical_broadcast())
68 return mps_cpu;
69 else
70 return BAD_APICID;
71}
72
73static inline physid_mask_t apicid_to_cpu_present(int apicid)
74{
75 return physid_mask_of_physid(apicid);
76}
77
78#define WAKE_SECONDARY_VIA_INIT
79
80static inline void setup_portio_remap(void)
81{
82}
83
84static inline void enable_apic_mode(void)
85{
86}
87
88static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
89{
90 return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
91}
92
93static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
94{
95 return cpus_addr(cpumask)[0];
96}
97
98static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
99{
100 return cpuid_apic >> index_msb;
101}
102
103#endif /* __ASM_MACH_APIC_H */
diff --git a/include/asm-x86/mach-visws/mach_apicdef.h b/include/asm-x86/mach-visws/mach_apicdef.h
index 826cfa97d778..42711d152a93 100644
--- a/include/asm-x86/mach-visws/mach_apicdef.h
+++ b/include/asm-x86/mach-visws/mach_apicdef.h
@@ -1,12 +1 @@
1#ifndef __ASM_MACH_APICDEF_H #include "../mach-default/mach_apicdef.h"
2#define __ASM_MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xF);
9}
10#define GET_APIC_ID(x) get_apic_id(x)
11
12#endif
diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h
index 33f700ef6831..fa4766ca2d10 100644
--- a/include/asm-x86/mach-visws/setup_arch.h
+++ b/include/asm-x86/mach-visws/setup_arch.h
@@ -1,8 +1 @@
1/* Hook to call BIOS initialisation function */ #include "../mach-default/setup_arch.h"
2
3extern unsigned long sgivwfb_mem_phys;
4extern unsigned long sgivwfb_mem_size;
5
6/* no action for visws */
7
8#define ARCH_SETUP
diff --git a/include/asm-x86/mach-visws/smpboot_hooks.h b/include/asm-x86/mach-visws/smpboot_hooks.h
index c9b83e395a2e..e4433ca88715 100644
--- a/include/asm-x86/mach-visws/smpboot_hooks.h
+++ b/include/asm-x86/mach-visws/smpboot_hooks.h
@@ -1,28 +1 @@
1static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) #include "../mach-default/smpboot_hooks.h"
2{
3 CMOS_WRITE(0xa, 0xf);
4 local_flush_tlb();
5 Dprintk("1.\n");
6 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
7 Dprintk("2.\n");
8 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
9 Dprintk("3.\n");
10}
11
12/* for visws do nothing for any of these */
13
14static inline void smpboot_clear_io_apic_irqs(void)
15{
16}
17
18static inline void smpboot_restore_warm_reset_vector(void)
19{
20}
21
22static inline void smpboot_setup_io_apic(void)
23{
24}
25
26static inline void smpboot_clear_io_apic(void)
27{
28}
diff --git a/include/asm-x86/mach-voyager/irq_vectors.h b/include/asm-x86/mach-voyager/irq_vectors.h
deleted file mode 100644
index 165421f5821c..000000000000
--- a/include/asm-x86/mach-voyager/irq_vectors.h
+++ /dev/null
@@ -1,79 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 2002
4 *
5 * Author: James.Bottomley@HansenPartnership.com
6 *
7 * linux/arch/i386/voyager/irq_vectors.h
8 *
9 * This file provides definitions for the VIC and QIC CPIs
10 */
11
12#ifndef _ASM_IRQ_VECTORS_H
13#define _ASM_IRQ_VECTORS_H
14
15/*
16 * IDT vectors usable for external interrupt sources start
17 * at 0x20:
18 */
19#define FIRST_EXTERNAL_VECTOR 0x20
20
21#define SYSCALL_VECTOR 0x80
22
23/*
24 * Vectors 0x20-0x2f are used for ISA interrupts.
25 */
26
27/* These define the CPIs we use in linux */
28#define VIC_CPI_LEVEL0 0
29#define VIC_CPI_LEVEL1 1
30/* now the fake CPIs */
31#define VIC_TIMER_CPI 2
32#define VIC_INVALIDATE_CPI 3
33#define VIC_RESCHEDULE_CPI 4
34#define VIC_ENABLE_IRQ_CPI 5
35#define VIC_CALL_FUNCTION_CPI 6
36
37/* Now the QIC CPIs: Since we don't need the two initial levels,
38 * these are 2 less than the VIC CPIs */
39#define QIC_CPI_OFFSET 1
40#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET)
41#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
42#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
43#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
44#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
45
46#define VIC_START_FAKE_CPI VIC_TIMER_CPI
47#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_CPI
48
49/* this is the SYS_INT CPI. */
50#define VIC_SYS_INT 8
51#define VIC_CMN_INT 15
52
53/* This is the boot CPI for alternate processors. It gets overwritten
54 * by the above once the system has activated all available processors */
55#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0
56#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8)
57
58#define NR_VECTORS 256
59#define NR_IRQS 224
60#define NR_IRQ_VECTORS NR_IRQS
61
62#define FPU_IRQ 13
63
64#define FIRST_VM86_IRQ 3
65#define LAST_VM86_IRQ 15
66#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
67
68#ifndef __ASSEMBLY__
69extern asmlinkage void vic_cpi_interrupt(void);
70extern asmlinkage void vic_sys_interrupt(void);
71extern asmlinkage void vic_cmn_interrupt(void);
72extern asmlinkage void qic_timer_interrupt(void);
73extern asmlinkage void qic_invalidate_interrupt(void);
74extern asmlinkage void qic_reschedule_interrupt(void);
75extern asmlinkage void qic_enable_irq_interrupt(void);
76extern asmlinkage void qic_call_function_interrupt(void);
77#endif /* !__ASSEMBLY__ */
78
79#endif /* _ASM_IRQ_VECTORS_H */
diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h
new file mode 100644
index 000000000000..95beda07c6fa
--- /dev/null
+++ b/include/asm-x86/mmconfig.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_MMCONFIG_H
2#define _ASM_MMCONFIG_H
3
4#ifdef CONFIG_PCI_MMCONFIG
5extern void __cpuinit fam10h_check_enable_mmcfg(void);
6extern void __init check_enable_amd_mmconf_dmi(void);
7#else
8static inline void fam10h_check_enable_mmcfg(void) { }
9static inline void check_enable_amd_mmconf_dmi(void) { }
10#endif
11
12#endif
diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h
index 6598450da6c6..fac57014e7c6 100644
--- a/include/asm-x86/mmu_context.h
+++ b/include/asm-x86/mmu_context.h
@@ -1,5 +1,37 @@
1#ifndef __ASM_X86_MMU_CONTEXT_H
2#define __ASM_X86_MMU_CONTEXT_H
3
4#include <asm/desc.h>
5#include <asm/atomic.h>
6#include <asm/pgalloc.h>
7#include <asm/tlbflush.h>
8#include <asm/paravirt.h>
9#ifndef CONFIG_PARAVIRT
10#include <asm-generic/mm_hooks.h>
11
12static inline void paravirt_activate_mm(struct mm_struct *prev,
13 struct mm_struct *next)
14{
15}
16#endif /* !CONFIG_PARAVIRT */
17
18/*
19 * Used for LDT copy/destruction.
20 */
21int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
22void destroy_context(struct mm_struct *mm);
23
1#ifdef CONFIG_X86_32 24#ifdef CONFIG_X86_32
2# include "mmu_context_32.h" 25# include "mmu_context_32.h"
3#else 26#else
4# include "mmu_context_64.h" 27# include "mmu_context_64.h"
5#endif 28#endif
29
30#define activate_mm(prev, next) \
31do { \
32 paravirt_activate_mm((prev), (next)); \
33 switch_mm((prev), (next), NULL); \
34} while (0);
35
36
37#endif /* __ASM_X86_MMU_CONTEXT_H */
diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h
index 9756ae0f1dd3..824fc575c6d8 100644
--- a/include/asm-x86/mmu_context_32.h
+++ b/include/asm-x86/mmu_context_32.h
@@ -1,28 +1,6 @@
1#ifndef __I386_SCHED_H 1#ifndef __I386_SCHED_H
2#define __I386_SCHED_H 2#define __I386_SCHED_H
3 3
4#include <asm/desc.h>
5#include <asm/atomic.h>
6#include <asm/pgalloc.h>
7#include <asm/tlbflush.h>
8#include <asm/paravirt.h>
9#ifndef CONFIG_PARAVIRT
10#include <asm-generic/mm_hooks.h>
11
12static inline void paravirt_activate_mm(struct mm_struct *prev,
13 struct mm_struct *next)
14{
15}
16#endif /* !CONFIG_PARAVIRT */
17
18
19/*
20 * Used for LDT copy/destruction.
21 */
22int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
23void destroy_context(struct mm_struct *mm);
24
25
26static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
27{ 5{
28#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
@@ -75,10 +53,4 @@ static inline void switch_mm(struct mm_struct *prev,
75#define deactivate_mm(tsk, mm) \ 53#define deactivate_mm(tsk, mm) \
76 asm("movl %0,%%gs": :"r" (0)); 54 asm("movl %0,%%gs": :"r" (0));
77 55
78#define activate_mm(prev, next) \
79do { \
80 paravirt_activate_mm((prev), (next)); \
81 switch_mm((prev), (next), NULL); \
82} while (0);
83
84#endif 56#endif
diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h
index ca44c71e7fb3..c7000634ccae 100644
--- a/include/asm-x86/mmu_context_64.h
+++ b/include/asm-x86/mmu_context_64.h
@@ -1,21 +1,7 @@
1#ifndef __X86_64_MMU_CONTEXT_H 1#ifndef __X86_64_MMU_CONTEXT_H
2#define __X86_64_MMU_CONTEXT_H 2#define __X86_64_MMU_CONTEXT_H
3 3
4#include <asm/desc.h>
5#include <asm/atomic.h>
6#include <asm/pgalloc.h>
7#include <asm/pda.h> 4#include <asm/pda.h>
8#include <asm/pgtable.h>
9#include <asm/tlbflush.h>
10#ifndef CONFIG_PARAVIRT
11#include <asm-generic/mm_hooks.h>
12#endif
13
14/*
15 * possibly do the LDT unload here?
16 */
17int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
18void destroy_context(struct mm_struct *mm);
19 5
20static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
21{ 7{
@@ -65,8 +51,4 @@ do { \
65 asm volatile("movl %0,%%fs"::"r"(0)); \ 51 asm volatile("movl %0,%%fs"::"r"(0)); \
66} while (0) 52} while (0)
67 53
68#define activate_mm(prev, next) \
69 switch_mm((prev), (next), NULL)
70
71
72#endif 54#endif
diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h
index cb2cad0b65a7..b2298a227567 100644
--- a/include/asm-x86/mmzone_32.h
+++ b/include/asm-x86/mmzone_32.h
@@ -12,11 +12,9 @@
12extern struct pglist_data *node_data[]; 12extern struct pglist_data *node_data[];
13#define NODE_DATA(nid) (node_data[nid]) 13#define NODE_DATA(nid) (node_data[nid])
14 14
15#ifdef CONFIG_X86_NUMAQ 15#include <asm/numaq.h>
16 #include <asm/numaq.h> 16/* summit or generic arch */
17#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */ 17#include <asm/srat.h>
18 #include <asm/srat.h>
19#endif
20 18
21extern int get_memcfg_numa_flat(void); 19extern int get_memcfg_numa_flat(void);
22/* 20/*
@@ -26,28 +24,20 @@ extern int get_memcfg_numa_flat(void);
26 */ 24 */
27static inline void get_memcfg_numa(void) 25static inline void get_memcfg_numa(void)
28{ 26{
29#ifdef CONFIG_X86_NUMAQ 27
30 if (get_memcfg_numaq()) 28 if (get_memcfg_numaq())
31 return; 29 return;
32#elif defined(CONFIG_ACPI_SRAT)
33 if (get_memcfg_from_srat()) 30 if (get_memcfg_from_srat())
34 return; 31 return;
35#endif
36
37 get_memcfg_numa_flat(); 32 get_memcfg_numa_flat();
38} 33}
39 34
40extern int early_pfn_to_nid(unsigned long pfn); 35extern int early_pfn_to_nid(unsigned long pfn);
41extern void numa_kva_reserve(void);
42 36
43#else /* !CONFIG_NUMA */ 37#else /* !CONFIG_NUMA */
44 38
45#define get_memcfg_numa get_memcfg_numa_flat 39#define get_memcfg_numa get_memcfg_numa_flat
46#define get_zholes_size(n) (0)
47 40
48static inline void numa_kva_reserve(void)
49{
50}
51#endif /* CONFIG_NUMA */ 41#endif /* CONFIG_NUMA */
52 42
53#ifdef CONFIG_DISCONTIGMEM 43#ifdef CONFIG_DISCONTIGMEM
@@ -55,14 +45,14 @@ static inline void numa_kva_reserve(void)
55/* 45/*
56 * generic node memory support, the following assumptions apply: 46 * generic node memory support, the following assumptions apply:
57 * 47 *
58 * 1) memory comes in 256Mb contigious chunks which are either present or not 48 * 1) memory comes in 64Mb contigious chunks which are either present or not
59 * 2) we will not have more than 64Gb in total 49 * 2) we will not have more than 64Gb in total
60 * 50 *
61 * for now assume that 64Gb is max amount of RAM for whole system 51 * for now assume that 64Gb is max amount of RAM for whole system
62 * 64Gb / 4096bytes/page = 16777216 pages 52 * 64Gb / 4096bytes/page = 16777216 pages
63 */ 53 */
64#define MAX_NR_PAGES 16777216 54#define MAX_NR_PAGES 16777216
65#define MAX_ELEMENTS 256 55#define MAX_ELEMENTS 1024
66#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS) 56#define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
67 57
68extern s8 physnode_map[]; 58extern s8 physnode_map[];
@@ -87,9 +77,6 @@ static inline int pfn_to_nid(unsigned long pfn)
87 __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ 77 __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \
88}) 78})
89 79
90#ifdef CONFIG_X86_NUMAQ /* we have contiguous memory on NUMA-Q */
91#define pfn_valid(pfn) ((pfn) < num_physpages)
92#else
93static inline int pfn_valid(int pfn) 80static inline int pfn_valid(int pfn)
94{ 81{
95 int nid = pfn_to_nid(pfn); 82 int nid = pfn_to_nid(pfn);
@@ -98,7 +85,6 @@ static inline int pfn_valid(int pfn)
98 return (pfn < node_end_pfn(nid)); 85 return (pfn < node_end_pfn(nid));
99 return 0; 86 return 0;
100} 87}
101#endif /* CONFIG_X86_NUMAQ */
102 88
103#endif /* CONFIG_DISCONTIGMEM */ 89#endif /* CONFIG_DISCONTIGMEM */
104 90
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 57a991b9c053..b6995e567fcc 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -13,6 +13,12 @@ extern int apic_version[MAX_APICS];
13extern u8 apicid_2_node[]; 13extern u8 apicid_2_node[];
14extern int pic_mode; 14extern int pic_mode;
15 15
16#ifdef CONFIG_X86_NUMAQ
17extern int mp_bus_id_to_node[MAX_MP_BUSSES];
18extern int mp_bus_id_to_local[MAX_MP_BUSSES];
19extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
20#endif
21
16#define MAX_APICID 256 22#define MAX_APICID 256
17 23
18#else 24#else
@@ -21,26 +27,30 @@ extern int pic_mode;
21/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ 27/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */
22#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) 28#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
23 29
30#endif
31
24extern void early_find_smp_config(void); 32extern void early_find_smp_config(void);
25extern void early_get_smp_config(void); 33extern void early_get_smp_config(void);
26 34
27#endif
28
29#if defined(CONFIG_MCA) || defined(CONFIG_EISA) 35#if defined(CONFIG_MCA) || defined(CONFIG_EISA)
30extern int mp_bus_id_to_type[MAX_MP_BUSSES]; 36extern int mp_bus_id_to_type[MAX_MP_BUSSES];
31#endif 37#endif
32 38
33extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); 39extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
34 40
35extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
36
37extern unsigned int boot_cpu_physical_apicid; 41extern unsigned int boot_cpu_physical_apicid;
42extern unsigned int max_physical_apicid;
38extern int smp_found_config; 43extern int smp_found_config;
39extern int mpc_default_type; 44extern int mpc_default_type;
40extern unsigned long mp_lapic_addr; 45extern unsigned long mp_lapic_addr;
41 46
42extern void find_smp_config(void); 47extern void find_smp_config(void);
43extern void get_smp_config(void); 48extern void get_smp_config(void);
49#ifdef CONFIG_X86_MPPARSE
50extern void early_reserve_e820_mpc_new(void);
51#else
52static inline void early_reserve_e820_mpc_new(void) { }
53#endif
44 54
45void __cpuinit generic_processor_info(int apicid, int version); 55void __cpuinit generic_processor_info(int apicid, int version);
46#ifdef CONFIG_ACPI 56#ifdef CONFIG_ACPI
@@ -49,6 +59,17 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
49 u32 gsi); 59 u32 gsi);
50extern void mp_config_acpi_legacy_irqs(void); 60extern void mp_config_acpi_legacy_irqs(void);
51extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); 61extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
62#ifdef CONFIG_X86_IO_APIC
63extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
64 u32 gsi, int triggering, int polarity);
65#else
66static inline int
67mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
68 u32 gsi, int triggering, int polarity)
69{
70 return 0;
71}
72#endif
52#endif /* CONFIG_ACPI */ 73#endif /* CONFIG_ACPI */
53 74
54#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) 75#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS)
@@ -101,6 +122,7 @@ typedef struct physid_mask physid_mask_t;
101 __physid_mask; \ 122 __physid_mask; \
102 }) 123 })
103 124
125/* Note: will create very large stack frames if physid_mask_t is big */
104#define physid_mask_of_physid(physid) \ 126#define physid_mask_of_physid(physid) \
105 ({ \ 127 ({ \
106 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ 128 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
@@ -108,6 +130,12 @@ typedef struct physid_mask physid_mask_t;
108 __physid_mask; \ 130 __physid_mask; \
109 }) 131 })
110 132
133static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
134{
135 physids_clear(*map);
136 physid_set(physid, *map);
137}
138
111#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } 139#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
112#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } 140#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
113 141
diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index dc6ef85e3624..38d1e73b49e4 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -17,10 +17,11 @@
17# define MAX_MPC_ENTRY 1024 17# define MAX_MPC_ENTRY 1024
18# define MAX_APICS 256 18# define MAX_APICS 256
19#else 19#else
20/* 20# if NR_CPUS <= 255
21 * A maximum of 255 APICs with the current APIC ID architecture. 21# define MAX_APICS 255
22 */ 22# else
23# define MAX_APICS 255 23# define MAX_APICS 32768
24# endif
24#endif 25#endif
25 26
26struct intel_mp_floating { 27struct intel_mp_floating {
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 09413ad39d3c..44bce773012e 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -111,7 +111,9 @@
111#define MSR_K8_TOP_MEM2 0xc001001d 111#define MSR_K8_TOP_MEM2 0xc001001d
112#define MSR_K8_SYSCFG 0xc0010010 112#define MSR_K8_SYSCFG 0xc0010010
113#define MSR_K8_HWCR 0xc0010015 113#define MSR_K8_HWCR 0xc0010015
114#define MSR_K8_ENABLE_C1E 0xc0010055 114#define MSR_K8_INT_PENDING_MSG 0xc0010055
115/* C1E active bits in int pending message */
116#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
115#define MSR_K8_TSEG_ADDR 0xc0010112 117#define MSR_K8_TSEG_ADDR 0xc0010112
116#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ 118#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
117#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ 119#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h
index 2b5f2c91db25..ca110ee73f07 100644
--- a/include/asm-x86/msr.h
+++ b/include/asm-x86/msr.h
@@ -66,7 +66,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
66static inline void native_write_msr(unsigned int msr, 66static inline void native_write_msr(unsigned int msr,
67 unsigned low, unsigned high) 67 unsigned low, unsigned high)
68{ 68{
69 asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high)); 69 asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
70} 70}
71 71
72static inline int native_write_msr_safe(unsigned int msr, 72static inline int native_write_msr_safe(unsigned int msr,
@@ -81,7 +81,8 @@ static inline int native_write_msr_safe(unsigned int msr,
81 _ASM_EXTABLE(2b, 3b) 81 _ASM_EXTABLE(2b, 3b)
82 : "=a" (err) 82 : "=a" (err)
83 : "c" (msr), "0" (low), "d" (high), 83 : "c" (msr), "0" (low), "d" (high),
84 "i" (-EFAULT)); 84 "i" (-EFAULT)
85 : "memory");
85 return err; 86 return err;
86} 87}
87 88
diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h
index 1e363021e72f..21f8d0202a82 100644
--- a/include/asm-x86/nmi.h
+++ b/include/asm-x86/nmi.h
@@ -15,38 +15,13 @@
15 */ 15 */
16int do_nmi_callback(struct pt_regs *regs, int cpu); 16int do_nmi_callback(struct pt_regs *regs, int cpu);
17 17
18#ifdef CONFIG_PM
19
20/** Replace the PM callback routine for NMI. */
21struct pm_dev *set_nmi_pm_callback(pm_callback callback);
22
23/** Unset the PM callback routine back to the default. */
24void unset_nmi_pm_callback(struct pm_dev *dev);
25
26#else
27
28static inline struct pm_dev *set_nmi_pm_callback(pm_callback callback)
29{
30 return 0;
31}
32
33static inline void unset_nmi_pm_callback(struct pm_dev *dev)
34{
35}
36
37#endif /* CONFIG_PM */
38
39#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
40extern void default_do_nmi(struct pt_regs *); 19extern void default_do_nmi(struct pt_regs *);
41extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
42extern void nmi_watchdog_default(void);
43#else
44#define nmi_watchdog_default() do {} while (0)
45#endif 20#endif
46 21
22extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
47extern int check_nmi_watchdog(void); 23extern int check_nmi_watchdog(void);
48extern int nmi_watchdog_enabled; 24extern int nmi_watchdog_enabled;
49extern int unknown_nmi_panic;
50extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 25extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
51extern int avail_to_resrv_perfctr_nmi(unsigned int); 26extern int avail_to_resrv_perfctr_nmi(unsigned int);
52extern int reserve_perfctr_nmi(unsigned int); 27extern int reserve_perfctr_nmi(unsigned int);
@@ -62,12 +37,10 @@ extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
62 37
63extern atomic_t nmi_active; 38extern atomic_t nmi_active;
64extern unsigned int nmi_watchdog; 39extern unsigned int nmi_watchdog;
65#define NMI_DISABLED -1
66#define NMI_NONE 0 40#define NMI_NONE 0
67#define NMI_IO_APIC 1 41#define NMI_IO_APIC 1
68#define NMI_LOCAL_APIC 2 42#define NMI_LOCAL_APIC 2
69#define NMI_INVALID 3 43#define NMI_INVALID 3
70#define NMI_DEFAULT NMI_DISABLED
71 44
72struct ctl_table; 45struct ctl_table;
73struct file; 46struct file;
@@ -78,6 +51,24 @@ extern int unknown_nmi_panic;
78void __trigger_all_cpu_backtrace(void); 51void __trigger_all_cpu_backtrace(void);
79#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() 52#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
80 53
54static inline void localise_nmi_watchdog(void)
55{
56 if (nmi_watchdog == NMI_IO_APIC)
57 nmi_watchdog = NMI_LOCAL_APIC;
58}
59
60/* check if nmi_watchdog is active (ie was specified at boot) */
61static inline int nmi_watchdog_active(void)
62{
63 /*
64 * actually it should be:
65 * return (nmi_watchdog == NMI_LOCAL_APIC ||
66 * nmi_watchdog == NMI_IO_APIC)
67 * but since they are power of two we could use a
68 * cheaper way --cvg
69 */
70 return nmi_watchdog & 0x3;
71}
81#endif 72#endif
82 73
83void lapic_watchdog_stop(void); 74void lapic_watchdog_stop(void);
diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h
index 03d0f7a9bf02..220d7b7707a0 100644
--- a/include/asm-x86/numa_32.h
+++ b/include/asm-x86/numa_32.h
@@ -2,14 +2,10 @@
2#define _ASM_X86_32_NUMA_H 1 2#define _ASM_X86_32_NUMA_H 1
3 3
4extern int pxm_to_nid(int pxm); 4extern int pxm_to_nid(int pxm);
5extern void numa_remove_cpu(int cpu);
5 6
6#ifdef CONFIG_NUMA 7#ifdef CONFIG_NUMA
7extern void __init remap_numa_kva(void); 8extern void set_highmem_pages_init(void);
8extern void set_highmem_pages_init(int);
9#else
10static inline void remap_numa_kva(void)
11{
12}
13#endif 9#endif
14 10
15#endif /* _ASM_X86_32_NUMA_H */ 11#endif /* _ASM_X86_32_NUMA_H */
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 22e87c9f6a80..3830094434a9 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -14,32 +14,30 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
14 14
15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
16 16
17extern void numa_add_cpu(int cpu);
18extern void numa_init_array(void); 17extern void numa_init_array(void);
19extern int numa_off; 18extern int numa_off;
20 19
21extern void numa_set_node(int cpu, int node);
22extern void srat_reserve_add_area(int nodeid); 20extern void srat_reserve_add_area(int nodeid);
23extern int hotadd_percent; 21extern int hotadd_percent;
24 22
25extern s16 apicid_to_node[MAX_LOCAL_APIC]; 23extern s16 apicid_to_node[MAX_LOCAL_APIC];
26 24
27extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
28extern unsigned long numa_free_all_bootmem(void); 25extern unsigned long numa_free_all_bootmem(void);
29extern void setup_node_bootmem(int nodeid, unsigned long start, 26extern void setup_node_bootmem(int nodeid, unsigned long start,
30 unsigned long end); 27 unsigned long end);
31 28
32#ifdef CONFIG_NUMA 29#ifdef CONFIG_NUMA
33extern void __init init_cpu_to_node(void); 30extern void __init init_cpu_to_node(void);
34 31extern void __cpuinit numa_set_node(int cpu, int node);
35static inline void clear_node_cpumask(int cpu) 32extern void __cpuinit numa_clear_node(int cpu);
36{ 33extern void __cpuinit numa_add_cpu(int cpu);
37 clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); 34extern void __cpuinit numa_remove_cpu(int cpu);
38}
39
40#else 35#else
41#define init_cpu_to_node() do {} while (0) 36static inline void init_cpu_to_node(void) { }
42#define clear_node_cpumask(cpu) do {} while (0) 37static inline void numa_set_node(int cpu, int node) { }
38static inline void numa_clear_node(int cpu) { }
39static inline void numa_add_cpu(int cpu, int node) { }
40static inline void numa_remove_cpu(int cpu) { }
43#endif 41#endif
44 42
45#endif 43#endif
diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h
index 94b86c31239a..ef068d2465d6 100644
--- a/include/asm-x86/numaq.h
+++ b/include/asm-x86/numaq.h
@@ -28,6 +28,7 @@
28 28
29#ifdef CONFIG_X86_NUMAQ 29#ifdef CONFIG_X86_NUMAQ
30 30
31extern int found_numaq;
31extern int get_memcfg_numaq(void); 32extern int get_memcfg_numaq(void);
32 33
33/* 34/*
@@ -156,9 +157,10 @@ struct sys_cfg_data {
156 struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ 157 struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */
157}; 158};
158 159
159static inline unsigned long *get_zholes_size(int nid) 160#else
161static inline int get_memcfg_numaq(void)
160{ 162{
161 return NULL; 163 return 0;
162} 164}
163#endif /* CONFIG_X86_NUMAQ */ 165#endif /* CONFIG_X86_NUMAQ */
164#endif /* NUMAQ_H */ 166#endif /* NUMAQ_H */
diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h
index dc936dddf161..28d7b4533b1a 100644
--- a/include/asm-x86/page.h
+++ b/include/asm-x86/page.h
@@ -51,9 +51,17 @@
51 51
52#ifndef __ASSEMBLY__ 52#ifndef __ASSEMBLY__
53 53
54typedef struct { pgdval_t pgd; } pgd_t;
55typedef struct { pgprotval_t pgprot; } pgprot_t;
56
54extern int page_is_ram(unsigned long pagenr); 57extern int page_is_ram(unsigned long pagenr);
55extern int devmem_is_allowed(unsigned long pagenr); 58extern int devmem_is_allowed(unsigned long pagenr);
59extern void map_devmem(unsigned long pfn, unsigned long size,
60 pgprot_t vma_prot);
61extern void unmap_devmem(unsigned long pfn, unsigned long size,
62 pgprot_t vma_prot);
56 63
64extern unsigned long max_low_pfn_mapped;
57extern unsigned long max_pfn_mapped; 65extern unsigned long max_pfn_mapped;
58 66
59struct page; 67struct page;
@@ -74,9 +82,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
74 alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) 82 alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
75#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 83#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
76 84
77typedef struct { pgdval_t pgd; } pgd_t;
78typedef struct { pgprotval_t pgprot; } pgprot_t;
79
80static inline pgd_t native_make_pgd(pgdval_t val) 85static inline pgd_t native_make_pgd(pgdval_t val)
81{ 86{
82 return (pgd_t) { val }; 87 return (pgd_t) { val };
@@ -160,6 +165,7 @@ static inline pteval_t native_pte_val(pte_t pte)
160#endif 165#endif
161 166
162#define pte_val(x) native_pte_val(x) 167#define pte_val(x) native_pte_val(x)
168#define pte_flags(x) native_pte_val(x)
163#define __pte(x) native_make_pte(x) 169#define __pte(x) native_make_pte(x)
164 170
165#endif /* CONFIG_PARAVIRT */ 171#endif /* CONFIG_PARAVIRT */
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index ccf0ba3c3aba..ab8528793f08 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -13,6 +13,14 @@
13 */ 13 */
14#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) 14#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
15 15
16#ifdef CONFIG_4KSTACKS
17#define THREAD_ORDER 0
18#else
19#define THREAD_ORDER 1
20#endif
21#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
22
23
16#ifdef CONFIG_X86_PAE 24#ifdef CONFIG_X86_PAE
17/* 44=32+12, the limit we can fit into an unsigned long pfn */ 25/* 44=32+12, the limit we can fit into an unsigned long pfn */
18#define __PHYSICAL_MASK_SHIFT 44 26#define __PHYSICAL_MASK_SHIFT 44
@@ -84,6 +92,13 @@ extern int sysctl_legacy_va_layout;
84#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) 92#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
85#define MAXMEM (-__PAGE_OFFSET - __VMALLOC_RESERVE) 93#define MAXMEM (-__PAGE_OFFSET - __VMALLOC_RESERVE)
86 94
95extern void find_low_pfn_range(void);
96extern unsigned long init_memory_mapping(unsigned long start,
97 unsigned long end);
98extern void initmem_init(unsigned long, unsigned long);
99extern void setup_bootmem_allocator(void);
100
101
87#ifdef CONFIG_X86_USE_3DNOW 102#ifdef CONFIG_X86_USE_3DNOW
88#include <asm/mmx.h> 103#include <asm/mmx.h>
89 104
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 6ea72859c491..c6916c83e6b1 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -26,7 +26,13 @@
26#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) 26#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
27#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) 27#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
28 28
29#define __PAGE_OFFSET _AC(0xffff810000000000, UL) 29/*
30 * Set __PAGE_OFFSET to the most negative possible address +
31 * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
32 * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
33 * what Xen requires.
34 */
35#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
30 36
31#define __PHYSICAL_START CONFIG_PHYSICAL_START 37#define __PHYSICAL_START CONFIG_PHYSICAL_START
32#define __KERNEL_ALIGN 0x200000 38#define __KERNEL_ALIGN 0x200000
@@ -58,7 +64,8 @@
58void clear_page(void *page); 64void clear_page(void *page);
59void copy_page(void *to, void *from); 65void copy_page(void *to, void *from);
60 66
61extern unsigned long end_pfn; 67/* duplicated to the one in bootmem.h */
68extern unsigned long max_pfn;
62extern unsigned long phys_base; 69extern unsigned long phys_base;
63 70
64extern unsigned long __phys_addr(unsigned long); 71extern unsigned long __phys_addr(unsigned long);
@@ -83,10 +90,15 @@ typedef struct { pteval_t pte; } pte_t;
83extern unsigned long init_memory_mapping(unsigned long start, 90extern unsigned long init_memory_mapping(unsigned long start,
84 unsigned long end); 91 unsigned long end);
85 92
93extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
94
95extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
96extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
97
86#endif /* !__ASSEMBLY__ */ 98#endif /* !__ASSEMBLY__ */
87 99
88#ifdef CONFIG_FLATMEM 100#ifdef CONFIG_FLATMEM
89#define pfn_valid(pfn) ((pfn) < end_pfn) 101#define pfn_valid(pfn) ((pfn) < max_pfn)
90#endif 102#endif
91 103
92 104
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 0f13b945e240..ef5e8ec6a6ab 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -84,7 +84,7 @@ struct pv_time_ops {
84 int (*set_wallclock)(unsigned long); 84 int (*set_wallclock)(unsigned long);
85 85
86 unsigned long long (*sched_clock)(void); 86 unsigned long long (*sched_clock)(void);
87 unsigned long (*get_cpu_khz)(void); 87 unsigned long (*get_tsc_khz)(void);
88}; 88};
89 89
90struct pv_cpu_ops { 90struct pv_cpu_ops {
@@ -115,6 +115,9 @@ struct pv_cpu_ops {
115 void (*set_ldt)(const void *desc, unsigned entries); 115 void (*set_ldt)(const void *desc, unsigned entries);
116 unsigned long (*store_tr)(void); 116 unsigned long (*store_tr)(void);
117 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 117 void (*load_tls)(struct thread_struct *t, unsigned int cpu);
118#ifdef CONFIG_X86_64
119 void (*load_gs_index)(unsigned int idx);
120#endif
118 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum, 121 void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
119 const void *desc); 122 const void *desc);
120 void (*write_gdt_entry)(struct desc_struct *, 123 void (*write_gdt_entry)(struct desc_struct *,
@@ -141,8 +144,32 @@ struct pv_cpu_ops {
141 u64 (*read_pmc)(int counter); 144 u64 (*read_pmc)(int counter);
142 unsigned long long (*read_tscp)(unsigned int *aux); 145 unsigned long long (*read_tscp)(unsigned int *aux);
143 146
144 /* These two are jmp to, not actually called. */ 147 /*
145 void (*irq_enable_syscall_ret)(void); 148 * Atomically enable interrupts and return to userspace. This
149 * is only ever used to return to 32-bit processes; in a
150 * 64-bit kernel, it's used for 32-on-64 compat processes, but
151 * never native 64-bit processes. (Jump, not call.)
152 */
153 void (*irq_enable_sysexit)(void);
154
155 /*
156 * Switch to usermode gs and return to 64-bit usermode using
157 * sysret. Only used in 64-bit kernels to return to 64-bit
158 * processes. Usermode register state, including %rsp, must
159 * already be restored.
160 */
161 void (*usergs_sysret64)(void);
162
163 /*
164 * Switch to usermode gs and return to 32-bit usermode using
165 * sysret. Used to return to 32-on-64 compat processes.
166 * Other usermode register state, including %esp, must already
167 * be restored.
168 */
169 void (*usergs_sysret32)(void);
170
171 /* Normal iret. Jump to this with the standard iret stack
172 frame set up. */
146 void (*iret)(void); 173 void (*iret)(void);
147 174
148 void (*swapgs)(void); 175 void (*swapgs)(void);
@@ -165,6 +192,10 @@ struct pv_irq_ops {
165 void (*irq_enable)(void); 192 void (*irq_enable)(void);
166 void (*safe_halt)(void); 193 void (*safe_halt)(void);
167 void (*halt)(void); 194 void (*halt)(void);
195
196#ifdef CONFIG_X86_64
197 void (*adjust_exception_frame)(void);
198#endif
168}; 199};
169 200
170struct pv_apic_ops { 201struct pv_apic_ops {
@@ -219,7 +250,14 @@ struct pv_mmu_ops {
219 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 250 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
220 unsigned long va); 251 unsigned long va);
221 252
222 /* Hooks for allocating/releasing pagetable pages */ 253 /* Hooks for allocating and freeing a pagetable top-level */
254 int (*pgd_alloc)(struct mm_struct *mm);
255 void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
256
257 /*
258 * Hooks for allocating/releasing pagetable pages when they're
259 * attached to a pagetable
260 */
223 void (*alloc_pte)(struct mm_struct *mm, u32 pfn); 261 void (*alloc_pte)(struct mm_struct *mm, u32 pfn);
224 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); 262 void (*alloc_pmd)(struct mm_struct *mm, u32 pfn);
225 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 263 void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
@@ -238,7 +276,13 @@ struct pv_mmu_ops {
238 void (*pte_update_defer)(struct mm_struct *mm, 276 void (*pte_update_defer)(struct mm_struct *mm,
239 unsigned long addr, pte_t *ptep); 277 unsigned long addr, pte_t *ptep);
240 278
279 pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
280 pte_t *ptep);
281 void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
282 pte_t *ptep, pte_t pte);
283
241 pteval_t (*pte_val)(pte_t); 284 pteval_t (*pte_val)(pte_t);
285 pteval_t (*pte_flags)(pte_t);
242 pte_t (*make_pte)(pteval_t pte); 286 pte_t (*make_pte)(pteval_t pte);
243 287
244 pgdval_t (*pgd_val)(pgd_t); 288 pgdval_t (*pgd_val)(pgd_t);
@@ -273,6 +317,13 @@ struct pv_mmu_ops {
273#endif 317#endif
274 318
275 struct pv_lazy_ops lazy_mode; 319 struct pv_lazy_ops lazy_mode;
320
321 /* dom0 ops */
322
323 /* Sometimes the physical address is a pfn, and sometimes its
324 an mfn. We can tell which is which from the index. */
325 void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
326 unsigned long phys, pgprot_t flags);
276}; 327};
277 328
278/* This contains all the paravirt structures: we get a convenient 329/* This contains all the paravirt structures: we get a convenient
@@ -439,10 +490,17 @@ int paravirt_disable_iospace(void);
439#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" 490#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
440#endif 491#endif
441 492
493#ifdef CONFIG_PARAVIRT_DEBUG
494#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
495#else
496#define PVOP_TEST_NULL(op) ((void)op)
497#endif
498
442#define __PVOP_CALL(rettype, op, pre, post, ...) \ 499#define __PVOP_CALL(rettype, op, pre, post, ...) \
443 ({ \ 500 ({ \
444 rettype __ret; \ 501 rettype __ret; \
445 PVOP_CALL_ARGS; \ 502 PVOP_CALL_ARGS; \
503 PVOP_TEST_NULL(op); \
446 /* This is 32-bit specific, but is okay in 64-bit */ \ 504 /* This is 32-bit specific, but is okay in 64-bit */ \
447 /* since this condition will never hold */ \ 505 /* since this condition will never hold */ \
448 if (sizeof(rettype) > sizeof(unsigned long)) { \ 506 if (sizeof(rettype) > sizeof(unsigned long)) { \
@@ -471,6 +529,7 @@ int paravirt_disable_iospace(void);
471#define __PVOP_VCALL(op, pre, post, ...) \ 529#define __PVOP_VCALL(op, pre, post, ...) \
472 ({ \ 530 ({ \
473 PVOP_VCALL_ARGS; \ 531 PVOP_VCALL_ARGS; \
532 PVOP_TEST_NULL(op); \
474 asm volatile(pre \ 533 asm volatile(pre \
475 paravirt_alt(PARAVIRT_CALL) \ 534 paravirt_alt(PARAVIRT_CALL) \
476 post \ 535 post \
@@ -720,7 +779,7 @@ static inline unsigned long long paravirt_sched_clock(void)
720{ 779{
721 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 780 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
722} 781}
723#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) 782#define calibrate_tsc() (pv_time_ops.get_tsc_khz())
724 783
725static inline unsigned long long paravirt_read_pmc(int counter) 784static inline unsigned long long paravirt_read_pmc(int counter)
726{ 785{
@@ -789,6 +848,13 @@ static inline void load_TLS(struct thread_struct *t, unsigned cpu)
789 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); 848 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
790} 849}
791 850
851#ifdef CONFIG_X86_64
852static inline void load_gs_index(unsigned int gs)
853{
854 PVOP_VCALL1(pv_cpu_ops.load_gs_index, gs);
855}
856#endif
857
792static inline void write_ldt_entry(struct desc_struct *dt, int entry, 858static inline void write_ldt_entry(struct desc_struct *dt, int entry,
793 const void *desc) 859 const void *desc)
794{ 860{
@@ -912,6 +978,16 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
912 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 978 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
913} 979}
914 980
981static inline int paravirt_pgd_alloc(struct mm_struct *mm)
982{
983 return PVOP_CALL1(int, pv_mmu_ops.pgd_alloc, mm);
984}
985
986static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd)
987{
988 PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd);
989}
990
915static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) 991static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn)
916{ 992{
917 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); 993 PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn);
@@ -996,6 +1072,20 @@ static inline pteval_t pte_val(pte_t pte)
996 return ret; 1072 return ret;
997} 1073}
998 1074
1075static inline pteval_t pte_flags(pte_t pte)
1076{
1077 pteval_t ret;
1078
1079 if (sizeof(pteval_t) > sizeof(long))
1080 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
1081 pte.pte, (u64)pte.pte >> 32);
1082 else
1083 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
1084 pte.pte);
1085
1086 return ret;
1087}
1088
999static inline pgd_t __pgd(pgdval_t val) 1089static inline pgd_t __pgd(pgdval_t val)
1000{ 1090{
1001 pgdval_t ret; 1091 pgdval_t ret;
@@ -1024,6 +1114,29 @@ static inline pgdval_t pgd_val(pgd_t pgd)
1024 return ret; 1114 return ret;
1025} 1115}
1026 1116
1117#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1118static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
1119 pte_t *ptep)
1120{
1121 pteval_t ret;
1122
1123 ret = PVOP_CALL3(pteval_t, pv_mmu_ops.ptep_modify_prot_start,
1124 mm, addr, ptep);
1125
1126 return (pte_t) { .pte = ret };
1127}
1128
1129static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
1130 pte_t *ptep, pte_t pte)
1131{
1132 if (sizeof(pteval_t) > sizeof(long))
1133 /* 5 arg words */
1134 pv_mmu_ops.ptep_modify_prot_commit(mm, addr, ptep, pte);
1135 else
1136 PVOP_VCALL4(pv_mmu_ops.ptep_modify_prot_commit,
1137 mm, addr, ptep, pte.pte);
1138}
1139
1027static inline void set_pte(pte_t *ptep, pte_t pte) 1140static inline void set_pte(pte_t *ptep, pte_t pte)
1028{ 1141{
1029 if (sizeof(pteval_t) > sizeof(long)) 1142 if (sizeof(pteval_t) > sizeof(long))
@@ -1252,6 +1365,12 @@ static inline void arch_flush_lazy_mmu_mode(void)
1252 } 1365 }
1253} 1366}
1254 1367
1368static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
1369 unsigned long phys, pgprot_t flags)
1370{
1371 pv_mmu_ops.set_fixmap(idx, phys, flags);
1372}
1373
1255void _paravirt_nop(void); 1374void _paravirt_nop(void);
1256#define paravirt_nop ((void *)_paravirt_nop) 1375#define paravirt_nop ((void *)_paravirt_nop)
1257 1376
@@ -1374,54 +1493,86 @@ static inline unsigned long __raw_local_irq_save(void)
1374#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax 1493#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
1375#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) 1494#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8)
1376#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) 1495#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
1496#define PARA_INDIRECT(addr) *addr(%rip)
1377#else 1497#else
1378#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx 1498#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx
1379#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax 1499#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
1380#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) 1500#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4)
1381#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) 1501#define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
1502#define PARA_INDIRECT(addr) *%cs:addr
1382#endif 1503#endif
1383 1504
1384#define INTERRUPT_RETURN \ 1505#define INTERRUPT_RETURN \
1385 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 1506 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
1386 jmp *%cs:pv_cpu_ops+PV_CPU_iret) 1507 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
1387 1508
1388#define DISABLE_INTERRUPTS(clobbers) \ 1509#define DISABLE_INTERRUPTS(clobbers) \
1389 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 1510 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
1390 PV_SAVE_REGS; \ 1511 PV_SAVE_REGS; \
1391 call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \ 1512 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
1392 PV_RESTORE_REGS;) \ 1513 PV_RESTORE_REGS;) \
1393 1514
1394#define ENABLE_INTERRUPTS(clobbers) \ 1515#define ENABLE_INTERRUPTS(clobbers) \
1395 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 1516 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
1396 PV_SAVE_REGS; \ 1517 PV_SAVE_REGS; \
1397 call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ 1518 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
1398 PV_RESTORE_REGS;) 1519 PV_RESTORE_REGS;)
1399 1520
1400#define ENABLE_INTERRUPTS_SYSCALL_RET \ 1521#define USERGS_SYSRET32 \
1401 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ 1522 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
1402 CLBR_NONE, \ 1523 CLBR_NONE, \
1403 jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret) 1524 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
1404
1405 1525
1406#ifdef CONFIG_X86_32 1526#ifdef CONFIG_X86_32
1407#define GET_CR0_INTO_EAX \ 1527#define GET_CR0_INTO_EAX \
1408 push %ecx; push %edx; \ 1528 push %ecx; push %edx; \
1409 call *pv_cpu_ops+PV_CPU_read_cr0; \ 1529 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
1410 pop %edx; pop %ecx 1530 pop %edx; pop %ecx
1411#else 1531
1532#define ENABLE_INTERRUPTS_SYSEXIT \
1533 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1534 CLBR_NONE, \
1535 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1536
1537
1538#else /* !CONFIG_X86_32 */
1539
1540/*
1541 * If swapgs is used while the userspace stack is still current,
1542 * there's no way to call a pvop. The PV replacement *must* be
1543 * inlined, or the swapgs instruction must be trapped and emulated.
1544 */
1545#define SWAPGS_UNSAFE_STACK \
1546 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1547 swapgs)
1548
1412#define SWAPGS \ 1549#define SWAPGS \
1413 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1550 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1414 PV_SAVE_REGS; \ 1551 PV_SAVE_REGS; \
1415 call *pv_cpu_ops+PV_CPU_swapgs; \ 1552 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
1416 PV_RESTORE_REGS \ 1553 PV_RESTORE_REGS \
1417 ) 1554 )
1418 1555
1419#define GET_CR2_INTO_RCX \ 1556#define GET_CR2_INTO_RCX \
1420 call *pv_mmu_ops+PV_MMU_read_cr2; \ 1557 call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); \
1421 movq %rax, %rcx; \ 1558 movq %rax, %rcx; \
1422 xorq %rax, %rax; 1559 xorq %rax, %rax;
1423 1560
1424#endif 1561#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
1562 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
1563 CLBR_NONE, \
1564 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
1565
1566#define USERGS_SYSRET64 \
1567 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
1568 CLBR_NONE, \
1569 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
1570
1571#define ENABLE_INTERRUPTS_SYSEXIT32 \
1572 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1573 CLBR_NONE, \
1574 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1575#endif /* CONFIG_X86_32 */
1425 1576
1426#endif /* __ASSEMBLY__ */ 1577#endif /* __ASSEMBLY__ */
1427#endif /* CONFIG_PARAVIRT */ 1578#endif /* CONFIG_PARAVIRT */
diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h
index 88f60cc6a227..7edc47307217 100644
--- a/include/asm-x86/pat.h
+++ b/include/asm-x86/pat.h
@@ -1,14 +1,13 @@
1
2#ifndef _ASM_PAT_H 1#ifndef _ASM_PAT_H
3#define _ASM_PAT_H 1 2#define _ASM_PAT_H
4 3
5#include <linux/types.h> 4#include <linux/types.h>
6 5
7#ifdef CONFIG_X86_PAT 6#ifdef CONFIG_X86_PAT
8extern int pat_wc_enabled; 7extern int pat_enabled;
9extern void validate_pat_support(struct cpuinfo_x86 *c); 8extern void validate_pat_support(struct cpuinfo_x86 *c);
10#else 9#else
11static const int pat_wc_enabled = 0; 10static const int pat_enabled;
12static inline void validate_pat_support(struct cpuinfo_x86 *c) { } 11static inline void validate_pat_support(struct cpuinfo_x86 *c) { }
13#endif 12#endif
14 13
@@ -21,4 +20,3 @@ extern int free_memtype(u64 start, u64 end);
21extern void pat_disable(char *reason); 20extern void pat_disable(char *reason);
22 21
23#endif 22#endif
24
diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h
index 30bbde0cb34b..2db14cf17db8 100644
--- a/include/asm-x86/pci.h
+++ b/include/asm-x86/pci.h
@@ -18,6 +18,8 @@ struct pci_sysdata {
18#endif 18#endif
19}; 19};
20 20
21extern int pci_routeirq;
22
21/* scan a bus after allocating a pci_sysdata for it */ 23/* scan a bus after allocating a pci_sysdata for it */
22extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, 24extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
23 int node); 25 int node);
diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h
index 8c4c3a0368e2..a50d46851285 100644
--- a/include/asm-x86/pci_32.h
+++ b/include/asm-x86/pci_32.h
@@ -18,12 +18,14 @@ struct pci_dev;
18#define PCI_DMA_BUS_IS_PHYS (1) 18#define PCI_DMA_BUS_IS_PHYS (1)
19 19
20/* pci_unmap_{page,single} is a nop so... */ 20/* pci_unmap_{page,single} is a nop so... */
21#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) 21#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
22#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) 22#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
23#define pci_unmap_addr(PTR, ADDR_NAME) (0) 23#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
24#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) 24#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
25#define pci_unmap_len(PTR, LEN_NAME) (0) 25 do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
26#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) 26#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
27#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
28 do { break; } while (pci_unmap_len(PTR, LEN_NAME))
27 29
28 30
29#endif /* __KERNEL__ */ 31#endif /* __KERNEL__ */
diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h
index 101fb9e11954..b34e9a7cc80b 100644
--- a/include/asm-x86/pda.h
+++ b/include/asm-x86/pda.h
@@ -22,6 +22,8 @@ struct x8664_pda {
22 offset 40!!! */ 22 offset 40!!! */
23#endif 23#endif
24 char *irqstackptr; 24 char *irqstackptr;
25 short nodenumber; /* number of current node (32k max) */
26 short in_bootmem; /* pda lives in bootmem */
25 unsigned int __softirq_pending; 27 unsigned int __softirq_pending;
26 unsigned int __nmi_count; /* number of NMI on this CPUs */ 28 unsigned int __nmi_count; /* number of NMI on this CPUs */
27 short mmu_state; 29 short mmu_state;
@@ -37,8 +39,7 @@ struct x8664_pda {
37 unsigned irq_spurious_count; 39 unsigned irq_spurious_count;
38} ____cacheline_aligned_in_smp; 40} ____cacheline_aligned_in_smp;
39 41
40extern struct x8664_pda *_cpu_pda[]; 42extern struct x8664_pda **_cpu_pda;
41extern struct x8664_pda boot_cpu_pda[];
42extern void pda_init(int); 43extern void pda_init(int);
43 44
44#define cpu_pda(i) (_cpu_pda[i]) 45#define cpu_pda(i) (_cpu_pda[i])
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index 736fc3bb8e1e..912a3a17b9db 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -143,4 +143,50 @@ do { \
143#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 143#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
144#endif /* !__ASSEMBLY__ */ 144#endif /* !__ASSEMBLY__ */
145#endif /* !CONFIG_X86_64 */ 145#endif /* !CONFIG_X86_64 */
146
147#ifdef CONFIG_SMP
148
149/*
150 * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu
151 * variables that are initialized and accessed before there are per_cpu
152 * areas allocated.
153 */
154
155#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
156 DEFINE_PER_CPU(_type, _name) = _initvalue; \
157 __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
158 { [0 ... NR_CPUS-1] = _initvalue }; \
159 __typeof__(_type) *_name##_early_ptr = _name##_early_map
160
161#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
162 EXPORT_PER_CPU_SYMBOL(_name)
163
164#define DECLARE_EARLY_PER_CPU(_type, _name) \
165 DECLARE_PER_CPU(_type, _name); \
166 extern __typeof__(_type) *_name##_early_ptr; \
167 extern __typeof__(_type) _name##_early_map[]
168
169#define early_per_cpu_ptr(_name) (_name##_early_ptr)
170#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
171#define early_per_cpu(_name, _cpu) \
172 (early_per_cpu_ptr(_name) ? \
173 early_per_cpu_ptr(_name)[_cpu] : \
174 per_cpu(_name, _cpu))
175
176#else /* !CONFIG_SMP */
177#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
178 DEFINE_PER_CPU(_type, _name) = _initvalue
179
180#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
181 EXPORT_PER_CPU_SYMBOL(_name)
182
183#define DECLARE_EARLY_PER_CPU(_type, _name) \
184 DECLARE_PER_CPU(_type, _name)
185
186#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
187#define early_per_cpu_ptr(_name) NULL
188/* no early_per_cpu_map() */
189
190#endif /* !CONFIG_SMP */
191
146#endif /* _ASM_X86_PERCPU_H_ */ 192#endif /* _ASM_X86_PERCPU_H_ */
diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h
index 91e4641f3f31..d63ea431cb3b 100644
--- a/include/asm-x86/pgalloc.h
+++ b/include/asm-x86/pgalloc.h
@@ -5,9 +5,13 @@
5#include <linux/mm.h> /* for struct page */ 5#include <linux/mm.h> /* for struct page */
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7 7
8static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
9
8#ifdef CONFIG_PARAVIRT 10#ifdef CONFIG_PARAVIRT
9#include <asm/paravirt.h> 11#include <asm/paravirt.h>
10#else 12#else
13#define paravirt_pgd_alloc(mm) __paravirt_pgd_alloc(mm)
14static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {}
11static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {} 15static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
12static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {} 16static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
13static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, 17static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h
index 97c271b2910b..49cbd76b9547 100644
--- a/include/asm-x86/pgtable.h
+++ b/include/asm-x86/pgtable.h
@@ -20,30 +20,25 @@
20#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ 20#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
21#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ 21#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
22 22
23/* 23#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
24 * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a 24#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
25 * sign-extended value on 32-bit with all 1's in the upper word, 25#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
26 * which preserves the upper pte values on 64-bit ptes: 26#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
27 */ 27#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
28#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT) 28#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
29#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW) 29#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
30#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER) 30#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
31#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT) 31#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
32#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD) 32#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
33#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED) 33#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2)
34#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY) 34#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
35#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */ 35#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
36#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */ 36#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
37#define _PAGE_UNUSED1 (_AC(1, L)<<_PAGE_BIT_UNUSED1)
38#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2)
39#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3)
40#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT)
41#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
42 37
43#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 38#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
44#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX) 39#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
45#else 40#else
46#define _PAGE_NX 0 41#define _PAGE_NX (_AT(pteval_t, 0))
47#endif 42#endif
48 43
49/* If _PAGE_PRESENT is clear, we use these: */ 44/* If _PAGE_PRESENT is clear, we use these: */
@@ -83,19 +78,9 @@
83#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ 78#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
84 _PAGE_ACCESSED) 79 _PAGE_ACCESSED)
85 80
86#ifdef CONFIG_X86_32
87#define _PAGE_KERNEL_EXEC \
88 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
89#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
90
91#ifndef __ASSEMBLY__
92extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
93#endif /* __ASSEMBLY__ */
94#else
95#define __PAGE_KERNEL_EXEC \ 81#define __PAGE_KERNEL_EXEC \
96 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) 82 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
97#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) 83#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
98#endif
99 84
100#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) 85#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
101#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) 86#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
@@ -106,26 +91,22 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
106#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) 91#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
107#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) 92#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
108#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) 93#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
94#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
109#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 95#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
110 96
111#ifdef CONFIG_X86_32 97#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
112# define MAKE_GLOBAL(x) __pgprot((x)) 98#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
113#else 99#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
114# define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) 100#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
115#endif 101#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
116 102#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
117#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) 103#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
118#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) 104#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
119#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC) 105#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
120#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX) 106#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
121#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC) 107#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
122#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) 108#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
123#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS) 109#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
124#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
125#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
126#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
127#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
128#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
129 110
130/* xwr */ 111/* xwr */
131#define __P000 PAGE_NONE 112#define __P000 PAGE_NONE
@@ -164,37 +145,37 @@ extern struct list_head pgd_list;
164 */ 145 */
165static inline int pte_dirty(pte_t pte) 146static inline int pte_dirty(pte_t pte)
166{ 147{
167 return pte_val(pte) & _PAGE_DIRTY; 148 return pte_flags(pte) & _PAGE_DIRTY;
168} 149}
169 150
170static inline int pte_young(pte_t pte) 151static inline int pte_young(pte_t pte)
171{ 152{
172 return pte_val(pte) & _PAGE_ACCESSED; 153 return pte_flags(pte) & _PAGE_ACCESSED;
173} 154}
174 155
175static inline int pte_write(pte_t pte) 156static inline int pte_write(pte_t pte)
176{ 157{
177 return pte_val(pte) & _PAGE_RW; 158 return pte_flags(pte) & _PAGE_RW;
178} 159}
179 160
180static inline int pte_file(pte_t pte) 161static inline int pte_file(pte_t pte)
181{ 162{
182 return pte_val(pte) & _PAGE_FILE; 163 return pte_flags(pte) & _PAGE_FILE;
183} 164}
184 165
185static inline int pte_huge(pte_t pte) 166static inline int pte_huge(pte_t pte)
186{ 167{
187 return pte_val(pte) & _PAGE_PSE; 168 return pte_flags(pte) & _PAGE_PSE;
188} 169}
189 170
190static inline int pte_global(pte_t pte) 171static inline int pte_global(pte_t pte)
191{ 172{
192 return pte_val(pte) & _PAGE_GLOBAL; 173 return pte_flags(pte) & _PAGE_GLOBAL;
193} 174}
194 175
195static inline int pte_exec(pte_t pte) 176static inline int pte_exec(pte_t pte)
196{ 177{
197 return !(pte_val(pte) & _PAGE_NX); 178 return !(pte_flags(pte) & _PAGE_NX);
198} 179}
199 180
200static inline int pte_special(pte_t pte) 181static inline int pte_special(pte_t pte)
@@ -210,22 +191,22 @@ static inline int pmd_large(pmd_t pte)
210 191
211static inline pte_t pte_mkclean(pte_t pte) 192static inline pte_t pte_mkclean(pte_t pte)
212{ 193{
213 return __pte(pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); 194 return __pte(pte_val(pte) & ~_PAGE_DIRTY);
214} 195}
215 196
216static inline pte_t pte_mkold(pte_t pte) 197static inline pte_t pte_mkold(pte_t pte)
217{ 198{
218 return __pte(pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); 199 return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
219} 200}
220 201
221static inline pte_t pte_wrprotect(pte_t pte) 202static inline pte_t pte_wrprotect(pte_t pte)
222{ 203{
223 return __pte(pte_val(pte) & ~(pteval_t)_PAGE_RW); 204 return __pte(pte_val(pte) & ~_PAGE_RW);
224} 205}
225 206
226static inline pte_t pte_mkexec(pte_t pte) 207static inline pte_t pte_mkexec(pte_t pte)
227{ 208{
228 return __pte(pte_val(pte) & ~(pteval_t)_PAGE_NX); 209 return __pte(pte_val(pte) & ~_PAGE_NX);
229} 210}
230 211
231static inline pte_t pte_mkdirty(pte_t pte) 212static inline pte_t pte_mkdirty(pte_t pte)
@@ -250,7 +231,7 @@ static inline pte_t pte_mkhuge(pte_t pte)
250 231
251static inline pte_t pte_clrhuge(pte_t pte) 232static inline pte_t pte_clrhuge(pte_t pte)
252{ 233{
253 return __pte(pte_val(pte) & ~(pteval_t)_PAGE_PSE); 234 return __pte(pte_val(pte) & ~_PAGE_PSE);
254} 235}
255 236
256static inline pte_t pte_mkglobal(pte_t pte) 237static inline pte_t pte_mkglobal(pte_t pte)
@@ -260,7 +241,7 @@ static inline pte_t pte_mkglobal(pte_t pte)
260 241
261static inline pte_t pte_clrglobal(pte_t pte) 242static inline pte_t pte_clrglobal(pte_t pte)
262{ 243{
263 return __pte(pte_val(pte) & ~(pteval_t)_PAGE_GLOBAL); 244 return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
264} 245}
265 246
266static inline pte_t pte_mkspecial(pte_t pte) 247static inline pte_t pte_mkspecial(pte_t pte)
@@ -305,7 +286,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
305 return __pgprot(preservebits | addbits); 286 return __pgprot(preservebits | addbits);
306} 287}
307 288
308#define pte_pgprot(x) __pgprot(pte_val(x) & ~PTE_MASK) 289#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK)
309 290
310#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) 291#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
311 292
@@ -318,6 +299,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
318 unsigned long size, pgprot_t *vma_prot); 299 unsigned long size, pgprot_t *vma_prot);
319#endif 300#endif
320 301
302/* Install a pte for a particular vaddr in kernel space. */
303void set_pte_vaddr(unsigned long vaddr, pte_t pte);
304
321#ifdef CONFIG_PARAVIRT 305#ifdef CONFIG_PARAVIRT
322#include <asm/paravirt.h> 306#include <asm/paravirt.h>
323#else /* !CONFIG_PARAVIRT */ 307#else /* !CONFIG_PARAVIRT */
@@ -359,6 +343,26 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
359# include "pgtable_64.h" 343# include "pgtable_64.h"
360#endif 344#endif
361 345
346/*
347 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
348 *
349 * this macro returns the index of the entry in the pgd page which would
350 * control the given virtual address
351 */
352#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
353
354/*
355 * pgd_offset() returns a (pgd_t *)
356 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
357 */
358#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
359/*
360 * a shortcut which implies the use of the kernel's pgd, instead
361 * of a process's
362 */
363#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
364
365
362#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) 366#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
363#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) 367#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
364 368
@@ -369,8 +373,15 @@ enum {
369 PG_LEVEL_4K, 373 PG_LEVEL_4K,
370 PG_LEVEL_2M, 374 PG_LEVEL_2M,
371 PG_LEVEL_1G, 375 PG_LEVEL_1G,
376 PG_LEVEL_NUM
372}; 377};
373 378
379#ifdef CONFIG_PROC_FS
380extern void update_page_count(int level, unsigned long pages);
381#else
382static inline void update_page_count(int level, unsigned long pages) { }
383#endif
384
374/* 385/*
375 * Helper function that returns the kernel pagetable entry controlling 386 * Helper function that returns the kernel pagetable entry controlling
376 * the virtual address 'address'. NULL means no pagetable entry present. 387 * the virtual address 'address'. NULL means no pagetable entry present.
@@ -420,6 +431,8 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
420 * race with other CPU's that might be updating the dirty 431 * race with other CPU's that might be updating the dirty
421 * bit at the same time. 432 * bit at the same time.
422 */ 433 */
434struct vm_area_struct;
435
423#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 436#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
424extern int ptep_set_access_flags(struct vm_area_struct *vma, 437extern int ptep_set_access_flags(struct vm_area_struct *vma,
425 unsigned long address, pte_t *ptep, 438 unsigned long address, pte_t *ptep,
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index 32ca03109a4c..ec871c420d7e 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -113,26 +113,6 @@ extern unsigned long pg0[];
113 */ 113 */
114#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) 114#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
115 115
116/*
117 * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
118 *
119 * this macro returns the index of the entry in the pgd page which would
120 * control the given virtual address
121 */
122#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
123#define pgd_index_k(addr) pgd_index((addr))
124
125/*
126 * pgd_offset() returns a (pgd_t *)
127 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
128 */
129#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
130
131/*
132 * a shortcut which implies the use of the kernel's pgd, instead
133 * of a process's
134 */
135#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
136 116
137static inline int pud_large(pud_t pud) { return 0; } 117static inline int pud_large(pud_t pud) { return 0; }
138 118
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 1cc50d22d735..fa7208b483ca 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -70,6 +70,9 @@ extern void paging_init(void);
70 70
71struct mm_struct; 71struct mm_struct;
72 72
73void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
74
75
73static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, 76static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
74 pte_t *ptep) 77 pte_t *ptep)
75{ 78{
@@ -190,12 +193,9 @@ static inline int pmd_bad(pmd_t pmd)
190#define pgd_page_vaddr(pgd) \ 193#define pgd_page_vaddr(pgd) \
191 ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK)) 194 ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
192#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) 195#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
193#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
194#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
195#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
196#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) 196#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
197static inline int pgd_large(pgd_t pgd) { return 0; } 197static inline int pgd_large(pgd_t pgd) { return 0; }
198#define mk_kernel_pgd(address) ((pgd_t){ (address) | _KERNPG_TABLE }) 198#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
199 199
200/* PUD - Level3 access */ 200/* PUD - Level3 access */
201/* to find an entry in a page-table-directory. */ 201/* to find an entry in a page-table-directory. */
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index 199cab107d85..092b39b3a7e6 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -88,4 +88,10 @@
88#define CX86_ARR_BASE 0xc4 88#define CX86_ARR_BASE 0xc4
89#define CX86_RCR_BASE 0xdc 89#define CX86_RCR_BASE 0xdc
90 90
91#ifdef CONFIG_VM86
92#define X86_VM_MASK X86_EFLAGS_VM
93#else
94#define X86_VM_MASK 0 /* No VM86 support */
95#endif
96
91#endif /* __ASM_I386_PROCESSOR_FLAGS_H */ 97#endif /* __ASM_I386_PROCESSOR_FLAGS_H */
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 559105220a47..7f7382704592 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -153,7 +153,7 @@ static inline int hlt_works(int cpu)
153 153
154extern void cpu_detect(struct cpuinfo_x86 *c); 154extern void cpu_detect(struct cpuinfo_x86 *c);
155 155
156extern void identify_cpu(struct cpuinfo_x86 *); 156extern void early_cpu_init(void);
157extern void identify_boot_cpu(void); 157extern void identify_boot_cpu(void);
158extern void identify_secondary_cpu(struct cpuinfo_x86 *); 158extern void identify_secondary_cpu(struct cpuinfo_x86 *);
159extern void print_cpu_info(struct cpuinfo_x86 *); 159extern void print_cpu_info(struct cpuinfo_x86 *);
@@ -263,15 +263,11 @@ struct tss_struct {
263 struct thread_struct *io_bitmap_owner; 263 struct thread_struct *io_bitmap_owner;
264 264
265 /* 265 /*
266 * Pad the TSS to be cacheline-aligned (size is 0x100):
267 */
268 unsigned long __cacheline_filler[35];
269 /*
270 * .. and then another 0x100 bytes for the emergency kernel stack: 266 * .. and then another 0x100 bytes for the emergency kernel stack:
271 */ 267 */
272 unsigned long stack[64]; 268 unsigned long stack[64];
273 269
274} __attribute__((packed)); 270} ____cacheline_aligned;
275 271
276DECLARE_PER_CPU(struct tss_struct, init_tss); 272DECLARE_PER_CPU(struct tss_struct, init_tss);
277 273
@@ -535,7 +531,6 @@ static inline void load_sp0(struct tss_struct *tss,
535} 531}
536 532
537#define set_iopl_mask native_set_iopl_mask 533#define set_iopl_mask native_set_iopl_mask
538#define SWAPGS swapgs
539#endif /* CONFIG_PARAVIRT */ 534#endif /* CONFIG_PARAVIRT */
540 535
541/* 536/*
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 6c8b41b03f6d..3dd458c385c0 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -14,8 +14,6 @@ extern void ia32_syscall(void);
14extern void ia32_cstar_target(void); 14extern void ia32_cstar_target(void);
15extern void ia32_sysenter_target(void); 15extern void ia32_sysenter_target(void);
16 16
17extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
18
19extern void syscall32_cpu_init(void); 17extern void syscall32_cpu_init(void);
20 18
21extern void check_efer(void); 19extern void check_efer(void);
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h
index 9f922b0b95d6..8a71db803da6 100644
--- a/include/asm-x86/ptrace.h
+++ b/include/asm-x86/ptrace.h
@@ -3,7 +3,12 @@
3 3
4#include <linux/compiler.h> /* For __user */ 4#include <linux/compiler.h> /* For __user */
5#include <asm/ptrace-abi.h> 5#include <asm/ptrace-abi.h>
6#include <asm/processor-flags.h>
6 7
8#ifdef __KERNEL__
9#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */
10#include <asm/segment.h>
11#endif
7 12
8#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
9 14
@@ -55,9 +60,6 @@ struct pt_regs {
55 unsigned long ss; 60 unsigned long ss;
56}; 61};
57 62
58#include <asm/vm86.h>
59#include <asm/segment.h>
60
61#endif /* __KERNEL__ */ 63#endif /* __KERNEL__ */
62 64
63#else /* __i386__ */ 65#else /* __i386__ */
diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h
index e63741f19392..206f355786dc 100644
--- a/include/asm-x86/reboot.h
+++ b/include/asm-x86/reboot.h
@@ -14,8 +14,8 @@ struct machine_ops {
14 14
15extern struct machine_ops machine_ops; 15extern struct machine_ops machine_ops;
16 16
17void machine_real_restart(unsigned char *code, int length);
18void native_machine_crash_shutdown(struct pt_regs *regs); 17void native_machine_crash_shutdown(struct pt_regs *regs);
19void native_machine_shutdown(void); 18void native_machine_shutdown(void);
19void machine_real_restart(const unsigned char *code, int length);
20 20
21#endif /* _ASM_REBOOT_H */ 21#endif /* _ASM_REBOOT_H */
diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h
index 7400d3ad75c6..adec887dd7cd 100644
--- a/include/asm-x86/required-features.h
+++ b/include/asm-x86/required-features.h
@@ -19,9 +19,13 @@
19 19
20#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) 20#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
21# define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) 21# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
22# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
23#else 22#else
24# define NEED_PAE 0 23# define NEED_PAE 0
24#endif
25
26#ifdef CONFIG_X86_CMPXCHG64
27# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
28#else
25# define NEED_CX8 0 29# define NEED_CX8 0
26#endif 30#endif
27 31
@@ -38,7 +42,7 @@
38#endif 42#endif
39 43
40#ifdef CONFIG_X86_64 44#ifdef CONFIG_X86_64
41#define NEED_PSE (1<<(X86_FEATURE_PSE & 31)) 45#define NEED_PSE 0
42#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) 46#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
43#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) 47#define NEED_PGE (1<<(X86_FEATURE_PGE & 31))
44#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) 48#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h
index 2557514d7ef6..8d9f0b41ee86 100644
--- a/include/asm-x86/resume-trace.h
+++ b/include/asm-x86/resume-trace.h
@@ -6,7 +6,7 @@
6#define TRACE_RESUME(user) \ 6#define TRACE_RESUME(user) \
7do { \ 7do { \
8 if (pm_trace_enabled) { \ 8 if (pm_trace_enabled) { \
9 void *tracedata; \ 9 const void *tracedata; \
10 asm volatile(_ASM_MOV_UL " $1f,%0\n" \ 10 asm volatile(_ASM_MOV_UL " $1f,%0\n" \
11 ".section .tracedata,\"a\"\n" \ 11 ".section .tracedata,\"a\"\n" \
12 "1:\t.word %c1\n\t" \ 12 "1:\t.word %c1\n\t" \
diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h
index 18da19e89bff..36e71c5f306f 100644
--- a/include/asm-x86/seccomp_32.h
+++ b/include/asm-x86/seccomp_32.h
@@ -1,4 +1,5 @@
1#ifndef _ASM_SECCOMP_H 1#ifndef _ASM_SECCOMP_H
2#define _ASM_SECCOMP_H
2 3
3#include <linux/thread_info.h> 4#include <linux/thread_info.h>
4 5
diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h
index 553af65a2287..76cfe69aa63c 100644
--- a/include/asm-x86/seccomp_64.h
+++ b/include/asm-x86/seccomp_64.h
@@ -1,4 +1,5 @@
1#ifndef _ASM_SECCOMP_H 1#ifndef _ASM_SECCOMP_H
2#define _ASM_SECCOMP_H
2 3
3#include <linux/thread_info.h> 4#include <linux/thread_info.h>
4 5
diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h
index ed5131dd7d92..dfc8601c0892 100644
--- a/include/asm-x86/segment.h
+++ b/include/asm-x86/segment.h
@@ -61,18 +61,14 @@
61#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) 61#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
62 62
63#define GDT_ENTRY_DEFAULT_USER_CS 14 63#define GDT_ENTRY_DEFAULT_USER_CS 14
64#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
65 64
66#define GDT_ENTRY_DEFAULT_USER_DS 15 65#define GDT_ENTRY_DEFAULT_USER_DS 15
67#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
68 66
69#define GDT_ENTRY_KERNEL_BASE 12 67#define GDT_ENTRY_KERNEL_BASE 12
70 68
71#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) 69#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
72#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
73 70
74#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) 71#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
75#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
76 72
77#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) 73#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
78#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) 74#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
@@ -139,10 +135,11 @@
139#else 135#else
140#include <asm/cache.h> 136#include <asm/cache.h>
141 137
142#define __KERNEL_CS 0x10 138#define GDT_ENTRY_KERNEL32_CS 1
143#define __KERNEL_DS 0x18 139#define GDT_ENTRY_KERNEL_CS 2
140#define GDT_ENTRY_KERNEL_DS 3
144 141
145#define __KERNEL32_CS 0x08 142#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
146 143
147/* 144/*
148 * we cannot use the same code segment descriptor for user and kernel 145 * we cannot use the same code segment descriptor for user and kernel
@@ -150,10 +147,10 @@
150 * The segment offset needs to contain a RPL. Grr. -AK 147 * The segment offset needs to contain a RPL. Grr. -AK
151 * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 148 * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
152 */ 149 */
153 150#define GDT_ENTRY_DEFAULT_USER32_CS 4
154#define __USER32_CS 0x23 /* 4*8+3 */ 151#define GDT_ENTRY_DEFAULT_USER_DS 5
155#define __USER_DS 0x2b /* 5*8+3 */ 152#define GDT_ENTRY_DEFAULT_USER_CS 6
156#define __USER_CS 0x33 /* 6*8+3 */ 153#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3)
157#define __USER32_DS __USER_DS 154#define __USER32_DS __USER_DS
158 155
159#define GDT_ENTRY_TSS 8 /* needs two entries */ 156#define GDT_ENTRY_TSS 8 /* needs two entries */
@@ -175,6 +172,10 @@
175 172
176#endif 173#endif
177 174
175#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
176#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
177#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3)
178#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3)
178#ifndef CONFIG_PARAVIRT 179#ifndef CONFIG_PARAVIRT
179#define get_kernel_rpl() 0 180#define get_kernel_rpl() 0
180#endif 181#endif
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index fa6763af8d26..90ab2225e71b 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -8,7 +8,25 @@
8/* Interrupt control for vSMPowered x86_64 systems */ 8/* Interrupt control for vSMPowered x86_64 systems */
9void vsmp_init(void); 9void vsmp_init(void);
10 10
11char *machine_specific_memory_setup(void); 11#ifdef CONFIG_X86_VISWS
12extern void visws_early_detect(void);
13extern int is_visws_box(void);
14#else
15static inline void visws_early_detect(void) { }
16static inline int is_visws_box(void) { return 0; }
17#endif
18
19/*
20 * Any setup quirks to be performed?
21 */
22extern int (*arch_time_init_quirk)(void);
23extern int (*arch_pre_intr_init_quirk)(void);
24extern int (*arch_intr_init_quirk)(void);
25extern int (*arch_trap_init_quirk)(void);
26extern char * (*arch_memory_setup_quirk)(void);
27extern int (*mach_get_smp_config_quirk)(unsigned int early);
28extern int (*mach_find_smp_config_quirk)(unsigned int reserve);
29
12#ifndef CONFIG_PARAVIRT 30#ifndef CONFIG_PARAVIRT
13#define paravirt_post_allocator_init() do {} while (0) 31#define paravirt_post_allocator_init() do {} while (0)
14#endif 32#endif
@@ -43,26 +61,23 @@ char *machine_specific_memory_setup(void);
43 */ 61 */
44extern struct boot_params boot_params; 62extern struct boot_params boot_params;
45 63
46#ifdef __i386__
47/* 64/*
48 * Do NOT EVER look at the BIOS memory size location. 65 * Do NOT EVER look at the BIOS memory size location.
49 * It does not work on many machines. 66 * It does not work on many machines.
50 */ 67 */
51#define LOWMEMSIZE() (0x9f000) 68#define LOWMEMSIZE() (0x9f000)
52 69
53struct e820entry; 70#ifdef __i386__
54
55char * __init machine_specific_memory_setup(void);
56char *memory_setup(void);
57 71
58int __init copy_e820_map(struct e820entry *biosmap, int nr_map); 72void __init i386_start_kernel(void);
59int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map); 73extern void probe_roms(void);
60void __init add_memory_region(unsigned long long start,
61 unsigned long long size, int type);
62 74
75extern unsigned long init_pg_tables_start;
63extern unsigned long init_pg_tables_end; 76extern unsigned long init_pg_tables_end;
64 77
65 78#else
79void __init x86_64_start_kernel(char *real_mode);
80void __init x86_64_start_reservations(char *real_mode_data);
66 81
67#endif /* __i386__ */ 82#endif /* __i386__ */
68#endif /* _SETUP */ 83#endif /* _SETUP */
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5cd3112..2e221f1ce0b2 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -29,21 +29,12 @@ extern int smp_num_siblings;
29extern unsigned int num_processors; 29extern unsigned int num_processors;
30extern cpumask_t cpu_initialized; 30extern cpumask_t cpu_initialized;
31 31
32#ifdef CONFIG_SMP
33extern u16 x86_cpu_to_apicid_init[];
34extern u16 x86_bios_cpu_apicid_init[];
35extern void *x86_cpu_to_apicid_early_ptr;
36extern void *x86_bios_cpu_apicid_early_ptr;
37#else
38#define x86_cpu_to_apicid_early_ptr NULL
39#define x86_bios_cpu_apicid_early_ptr NULL
40#endif
41
42DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 32DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
43DECLARE_PER_CPU(cpumask_t, cpu_core_map); 33DECLARE_PER_CPU(cpumask_t, cpu_core_map);
44DECLARE_PER_CPU(u16, cpu_llc_id); 34DECLARE_PER_CPU(u16, cpu_llc_id);
45DECLARE_PER_CPU(u16, x86_cpu_to_apicid); 35
46DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); 36DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
37DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
47 38
48/* Static state in head.S used to set up a CPU */ 39/* Static state in head.S used to set up a CPU */
49extern struct { 40extern struct {
@@ -118,8 +109,6 @@ int native_cpu_up(unsigned int cpunum);
118extern int __cpu_disable(void); 109extern int __cpu_disable(void);
119extern void __cpu_die(unsigned int cpu); 110extern void __cpu_die(unsigned int cpu);
120 111
121extern void prefill_possible_map(void);
122
123void smp_store_cpu_info(int id); 112void smp_store_cpu_info(int id);
124#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 113#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
125 114
@@ -130,6 +119,14 @@ static inline int num_booting_cpus(void)
130} 119}
131#endif /* CONFIG_SMP */ 120#endif /* CONFIG_SMP */
132 121
122#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_CPU)
123extern void prefill_possible_map(void);
124#else
125static inline void prefill_possible_map(void)
126{
127}
128#endif
129
133extern unsigned disabled_cpus __cpuinitdata; 130extern unsigned disabled_cpus __cpuinitdata;
134 131
135#ifdef CONFIG_X86_32_SMP 132#ifdef CONFIG_X86_32_SMP
@@ -197,11 +194,9 @@ static inline int hard_smp_processor_id(void)
197#endif /* CONFIG_X86_LOCAL_APIC */ 194#endif /* CONFIG_X86_LOCAL_APIC */
198 195
199#ifdef CONFIG_HOTPLUG_CPU 196#ifdef CONFIG_HOTPLUG_CPU
200extern void cpu_exit_clear(void);
201extern void cpu_uninit(void); 197extern void cpu_uninit(void);
202#endif 198#endif
203 199
204extern void smp_alloc_memory(void);
205extern void lock_ipi_call_lock(void); 200extern void lock_ipi_call_lock(void);
206extern void unlock_ipi_call_lock(void); 201extern void unlock_ipi_call_lock(void);
207#endif /* __ASSEMBLY__ */ 202#endif /* __ASSEMBLY__ */
diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h
index f4bba131d068..774c919dc232 100644
--- a/include/asm-x86/srat.h
+++ b/include/asm-x86/srat.h
@@ -27,11 +27,13 @@
27#ifndef _ASM_SRAT_H_ 27#ifndef _ASM_SRAT_H_
28#define _ASM_SRAT_H_ 28#define _ASM_SRAT_H_
29 29
30#ifndef CONFIG_ACPI_SRAT 30#ifdef CONFIG_ACPI_NUMA
31#error CONFIG_ACPI_SRAT not defined, and srat.h header has been included
32#endif
33
34extern int get_memcfg_from_srat(void); 31extern int get_memcfg_from_srat(void);
35extern unsigned long *get_zholes_size(int); 32#else
33static inline int get_memcfg_from_srat(void)
34{
35 return 0;
36}
37#endif
36 38
37#endif /* _ASM_SRAT_H_ */ 39#endif /* _ASM_SRAT_H_ */
diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h
index b49369ad9a61..193578cd1fd9 100644
--- a/include/asm-x86/string_32.h
+++ b/include/asm-x86/string_32.h
@@ -29,81 +29,116 @@ extern char *strchr(const char *s, int c);
29#define __HAVE_ARCH_STRLEN 29#define __HAVE_ARCH_STRLEN
30extern size_t strlen(const char *s); 30extern size_t strlen(const char *s);
31 31
32static __always_inline void * __memcpy(void * to, const void * from, size_t n) 32static __always_inline void *__memcpy(void *to, const void *from, size_t n)
33{ 33{
34int d0, d1, d2; 34 int d0, d1, d2;
35__asm__ __volatile__( 35 asm volatile("rep ; movsl\n\t"
36 "rep ; movsl\n\t" 36 "movl %4,%%ecx\n\t"
37 "movl %4,%%ecx\n\t" 37 "andl $3,%%ecx\n\t"
38 "andl $3,%%ecx\n\t" 38 "jz 1f\n\t"
39 "jz 1f\n\t" 39 "rep ; movsb\n\t"
40 "rep ; movsb\n\t" 40 "1:"
41 "1:" 41 : "=&c" (d0), "=&D" (d1), "=&S" (d2)
42 : "=&c" (d0), "=&D" (d1), "=&S" (d2) 42 : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
43 : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) 43 : "memory");
44 : "memory"); 44 return to;
45return (to);
46} 45}
47 46
48/* 47/*
49 * This looks ugly, but the compiler can optimize it totally, 48 * This looks ugly, but the compiler can optimize it totally,
50 * as the count is constant. 49 * as the count is constant.
51 */ 50 */
52static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) 51static __always_inline void *__constant_memcpy(void *to, const void *from,
52 size_t n)
53{ 53{
54 long esi, edi; 54 long esi, edi;
55 if (!n) return to; 55 if (!n)
56#if 1 /* want to do small copies with non-string ops? */ 56 return to;
57
57 switch (n) { 58 switch (n) {
58 case 1: *(char*)to = *(char*)from; return to; 59 case 1:
59 case 2: *(short*)to = *(short*)from; return to; 60 *(char *)to = *(char *)from;
60 case 4: *(int*)to = *(int*)from; return to; 61 return to;
61#if 1 /* including those doable with two moves? */ 62 case 2:
62 case 3: *(short*)to = *(short*)from; 63 *(short *)to = *(short *)from;
63 *((char*)to+2) = *((char*)from+2); return to; 64 return to;
64 case 5: *(int*)to = *(int*)from; 65 case 4:
65 *((char*)to+4) = *((char*)from+4); return to; 66 *(int *)to = *(int *)from;
66 case 6: *(int*)to = *(int*)from; 67 return to;
67 *((short*)to+2) = *((short*)from+2); return to; 68
68 case 8: *(int*)to = *(int*)from; 69 case 3:
69 *((int*)to+1) = *((int*)from+1); return to; 70 *(short *)to = *(short *)from;
70#endif 71 *((char *)to + 2) = *((char *)from + 2);
72 return to;
73 case 5:
74 *(int *)to = *(int *)from;
75 *((char *)to + 4) = *((char *)from + 4);
76 return to;
77 case 6:
78 *(int *)to = *(int *)from;
79 *((short *)to + 2) = *((short *)from + 2);
80 return to;
81 case 8:
82 *(int *)to = *(int *)from;
83 *((int *)to + 1) = *((int *)from + 1);
84 return to;
71 } 85 }
72#endif 86
73 esi = (long) from; 87 esi = (long)from;
74 edi = (long) to; 88 edi = (long)to;
75 if (n >= 5*4) { 89 if (n >= 5 * 4) {
76 /* large block: use rep prefix */ 90 /* large block: use rep prefix */
77 int ecx; 91 int ecx;
78 __asm__ __volatile__( 92 asm volatile("rep ; movsl"
79 "rep ; movsl" 93 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
80 : "=&c" (ecx), "=&D" (edi), "=&S" (esi) 94 : "0" (n / 4), "1" (edi), "2" (esi)
81 : "0" (n/4), "1" (edi),"2" (esi) 95 : "memory"
82 : "memory"
83 ); 96 );
84 } else { 97 } else {
85 /* small block: don't clobber ecx + smaller code */ 98 /* small block: don't clobber ecx + smaller code */
86 if (n >= 4*4) __asm__ __volatile__("movsl" 99 if (n >= 4 * 4)
87 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 100 asm volatile("movsl"
88 if (n >= 3*4) __asm__ __volatile__("movsl" 101 : "=&D"(edi), "=&S"(esi)
89 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 102 : "0"(edi), "1"(esi)
90 if (n >= 2*4) __asm__ __volatile__("movsl" 103 : "memory");
91 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 104 if (n >= 3 * 4)
92 if (n >= 1*4) __asm__ __volatile__("movsl" 105 asm volatile("movsl"
93 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 106 : "=&D"(edi), "=&S"(esi)
107 : "0"(edi), "1"(esi)
108 : "memory");
109 if (n >= 2 * 4)
110 asm volatile("movsl"
111 : "=&D"(edi), "=&S"(esi)
112 : "0"(edi), "1"(esi)
113 : "memory");
114 if (n >= 1 * 4)
115 asm volatile("movsl"
116 : "=&D"(edi), "=&S"(esi)
117 : "0"(edi), "1"(esi)
118 : "memory");
94 } 119 }
95 switch (n % 4) { 120 switch (n % 4) {
96 /* tail */ 121 /* tail */
97 case 0: return to; 122 case 0:
98 case 1: __asm__ __volatile__("movsb" 123 return to;
99 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 124 case 1:
100 return to; 125 asm volatile("movsb"
101 case 2: __asm__ __volatile__("movsw" 126 : "=&D"(edi), "=&S"(esi)
102 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 127 : "0"(edi), "1"(esi)
103 return to; 128 : "memory");
104 default: __asm__ __volatile__("movsw\n\tmovsb" 129 return to;
105 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); 130 case 2:
106 return to; 131 asm volatile("movsw"
132 : "=&D"(edi), "=&S"(esi)
133 : "0"(edi), "1"(esi)
134 : "memory");
135 return to;
136 default:
137 asm volatile("movsw\n\tmovsb"
138 : "=&D"(edi), "=&S"(esi)
139 : "0"(edi), "1"(esi)
140 : "memory");
141 return to;
107 } 142 }
108} 143}
109 144
@@ -117,87 +152,86 @@ static __always_inline void * __constant_memcpy(void * to, const void * from, si
117 * This CPU favours 3DNow strongly (eg AMD Athlon) 152 * This CPU favours 3DNow strongly (eg AMD Athlon)
118 */ 153 */
119 154
120static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) 155static inline void *__constant_memcpy3d(void *to, const void *from, size_t len)
121{ 156{
122 if (len < 512) 157 if (len < 512)
123 return __constant_memcpy(to, from, len); 158 return __constant_memcpy(to, from, len);
124 return _mmx_memcpy(to, from, len); 159 return _mmx_memcpy(to, from, len);
125} 160}
126 161
127static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) 162static inline void *__memcpy3d(void *to, const void *from, size_t len)
128{ 163{
129 if (len < 512) 164 if (len < 512)
130 return __memcpy(to, from, len); 165 return __memcpy(to, from, len);
131 return _mmx_memcpy(to, from, len); 166 return _mmx_memcpy(to, from, len);
132} 167}
133 168
134#define memcpy(t, f, n) \ 169#define memcpy(t, f, n) \
135(__builtin_constant_p(n) ? \ 170 (__builtin_constant_p((n)) \
136 __constant_memcpy3d((t),(f),(n)) : \ 171 ? __constant_memcpy3d((t), (f), (n)) \
137 __memcpy3d((t),(f),(n))) 172 : __memcpy3d((t), (f), (n)))
138 173
139#else 174#else
140 175
141/* 176/*
142 * No 3D Now! 177 * No 3D Now!
143 */ 178 */
144 179
145#define memcpy(t, f, n) \ 180#define memcpy(t, f, n) \
146(__builtin_constant_p(n) ? \ 181 (__builtin_constant_p((n)) \
147 __constant_memcpy((t),(f),(n)) : \ 182 ? __constant_memcpy((t), (f), (n)) \
148 __memcpy((t),(f),(n))) 183 : __memcpy((t), (f), (n)))
149 184
150#endif 185#endif
151 186
152#define __HAVE_ARCH_MEMMOVE 187#define __HAVE_ARCH_MEMMOVE
153void *memmove(void * dest,const void * src, size_t n); 188void *memmove(void *dest, const void *src, size_t n);
154 189
155#define memcmp __builtin_memcmp 190#define memcmp __builtin_memcmp
156 191
157#define __HAVE_ARCH_MEMCHR 192#define __HAVE_ARCH_MEMCHR
158extern void *memchr(const void * cs,int c,size_t count); 193extern void *memchr(const void *cs, int c, size_t count);
159 194
160static inline void * __memset_generic(void * s, char c,size_t count) 195static inline void *__memset_generic(void *s, char c, size_t count)
161{ 196{
162int d0, d1; 197 int d0, d1;
163__asm__ __volatile__( 198 asm volatile("rep\n\t"
164 "rep\n\t" 199 "stosb"
165 "stosb" 200 : "=&c" (d0), "=&D" (d1)
166 : "=&c" (d0), "=&D" (d1) 201 : "a" (c), "1" (s), "0" (count)
167 :"a" (c),"1" (s),"0" (count) 202 : "memory");
168 :"memory"); 203 return s;
169return s;
170} 204}
171 205
172/* we might want to write optimized versions of these later */ 206/* we might want to write optimized versions of these later */
173#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) 207#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count))
174 208
175/* 209/*
176 * memset(x,0,y) is a reasonably common thing to do, so we want to fill 210 * memset(x, 0, y) is a reasonably common thing to do, so we want to fill
177 * things 32 bits at a time even when we don't know the size of the 211 * things 32 bits at a time even when we don't know the size of the
178 * area at compile-time.. 212 * area at compile-time..
179 */ 213 */
180static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) 214static __always_inline
215void *__constant_c_memset(void *s, unsigned long c, size_t count)
181{ 216{
182int d0, d1; 217 int d0, d1;
183__asm__ __volatile__( 218 asm volatile("rep ; stosl\n\t"
184 "rep ; stosl\n\t" 219 "testb $2,%b3\n\t"
185 "testb $2,%b3\n\t" 220 "je 1f\n\t"
186 "je 1f\n\t" 221 "stosw\n"
187 "stosw\n" 222 "1:\ttestb $1,%b3\n\t"
188 "1:\ttestb $1,%b3\n\t" 223 "je 2f\n\t"
189 "je 2f\n\t" 224 "stosb\n"
190 "stosb\n" 225 "2:"
191 "2:" 226 : "=&c" (d0), "=&D" (d1)
192 :"=&c" (d0), "=&D" (d1) 227 : "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
193 :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) 228 : "memory");
194 :"memory"); 229 return s;
195return (s);
196} 230}
197 231
198/* Added by Gertjan van Wingerde to make minix and sysv module work */ 232/* Added by Gertjan van Wingerde to make minix and sysv module work */
199#define __HAVE_ARCH_STRNLEN 233#define __HAVE_ARCH_STRNLEN
200extern size_t strnlen(const char * s, size_t count); 234extern size_t strnlen(const char *s, size_t count);
201/* end of additional stuff */ 235/* end of additional stuff */
202 236
203#define __HAVE_ARCH_STRSTR 237#define __HAVE_ARCH_STRSTR
@@ -207,66 +241,85 @@ extern char *strstr(const char *cs, const char *ct);
207 * This looks horribly ugly, but the compiler can optimize it totally, 241 * This looks horribly ugly, but the compiler can optimize it totally,
208 * as we by now know that both pattern and count is constant.. 242 * as we by now know that both pattern and count is constant..
209 */ 243 */
210static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) 244static __always_inline
245void *__constant_c_and_count_memset(void *s, unsigned long pattern,
246 size_t count)
211{ 247{
212 switch (count) { 248 switch (count) {
249 case 0:
250 return s;
251 case 1:
252 *(unsigned char *)s = pattern & 0xff;
253 return s;
254 case 2:
255 *(unsigned short *)s = pattern & 0xffff;
256 return s;
257 case 3:
258 *(unsigned short *)s = pattern & 0xffff;
259 *((unsigned char *)s + 2) = pattern & 0xff;
260 return s;
261 case 4:
262 *(unsigned long *)s = pattern;
263 return s;
264 }
265
266#define COMMON(x) \
267 asm volatile("rep ; stosl" \
268 x \
269 : "=&c" (d0), "=&D" (d1) \
270 : "a" (eax), "0" (count/4), "1" ((long)s) \
271 : "memory")
272
273 {
274 int d0, d1;
275#if __GNUC__ == 4 && __GNUC_MINOR__ == 0
276 /* Workaround for broken gcc 4.0 */
277 register unsigned long eax asm("%eax") = pattern;
278#else
279 unsigned long eax = pattern;
280#endif
281
282 switch (count % 4) {
213 case 0: 283 case 0:
284 COMMON("");
214 return s; 285 return s;
215 case 1: 286 case 1:
216 *(unsigned char *)s = pattern & 0xff; 287 COMMON("\n\tstosb");
217 return s; 288 return s;
218 case 2: 289 case 2:
219 *(unsigned short *)s = pattern & 0xffff; 290 COMMON("\n\tstosw");
220 return s; 291 return s;
221 case 3: 292 default:
222 *(unsigned short *)s = pattern & 0xffff; 293 COMMON("\n\tstosw\n\tstosb");
223 *(2+(unsigned char *)s) = pattern & 0xff;
224 return s;
225 case 4:
226 *(unsigned long *)s = pattern;
227 return s; 294 return s;
295 }
228 } 296 }
229#define COMMON(x) \ 297
230__asm__ __volatile__( \
231 "rep ; stosl" \
232 x \
233 : "=&c" (d0), "=&D" (d1) \
234 : "a" (pattern),"0" (count/4),"1" ((long) s) \
235 : "memory")
236{
237 int d0, d1;
238 switch (count % 4) {
239 case 0: COMMON(""); return s;
240 case 1: COMMON("\n\tstosb"); return s;
241 case 2: COMMON("\n\tstosw"); return s;
242 default: COMMON("\n\tstosw\n\tstosb"); return s;
243 }
244}
245
246#undef COMMON 298#undef COMMON
247} 299}
248 300
249#define __constant_c_x_memset(s, c, count) \ 301#define __constant_c_x_memset(s, c, count) \
250(__builtin_constant_p(count) ? \ 302 (__builtin_constant_p(count) \
251 __constant_c_and_count_memset((s),(c),(count)) : \ 303 ? __constant_c_and_count_memset((s), (c), (count)) \
252 __constant_c_memset((s),(c),(count))) 304 : __constant_c_memset((s), (c), (count)))
253 305
254#define __memset(s, c, count) \ 306#define __memset(s, c, count) \
255(__builtin_constant_p(count) ? \ 307 (__builtin_constant_p(count) \
256 __constant_count_memset((s),(c),(count)) : \ 308 ? __constant_count_memset((s), (c), (count)) \
257 __memset_generic((s),(c),(count))) 309 : __memset_generic((s), (c), (count)))
258 310
259#define __HAVE_ARCH_MEMSET 311#define __HAVE_ARCH_MEMSET
260#define memset(s, c, count) \ 312#define memset(s, c, count) \
261(__builtin_constant_p(c) ? \ 313 (__builtin_constant_p(c) \
262 __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ 314 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \
263 __memset((s),(c),(count))) 315 (count)) \
316 : __memset((s), (c), (count)))
264 317
265/* 318/*
266 * find the first occurrence of byte 'c', or 1 past the area if none 319 * find the first occurrence of byte 'c', or 1 past the area if none
267 */ 320 */
268#define __HAVE_ARCH_MEMSCAN 321#define __HAVE_ARCH_MEMSCAN
269extern void *memscan(void * addr, int c, size_t size); 322extern void *memscan(void *addr, int c, size_t size);
270 323
271#endif /* __KERNEL__ */ 324#endif /* __KERNEL__ */
272 325
diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h
index 24e1c080aa8a..8675c6782a7d 100644
--- a/include/asm-x86/suspend_32.h
+++ b/include/asm-x86/suspend_32.h
@@ -3,6 +3,9 @@
3 * Based on code 3 * Based on code
4 * Copyright 2001 Patrick Mochel <mochel@osdl.org> 4 * Copyright 2001 Patrick Mochel <mochel@osdl.org>
5 */ 5 */
6#ifndef __ASM_X86_32_SUSPEND_H
7#define __ASM_X86_32_SUSPEND_H
8
6#include <asm/desc.h> 9#include <asm/desc.h>
7#include <asm/i387.h> 10#include <asm/i387.h>
8 11
@@ -44,3 +47,5 @@ static inline void acpi_save_register_state(unsigned long return_point)
44/* routines for saving/restoring kernel state */ 47/* routines for saving/restoring kernel state */
45extern int acpi_save_state_mem(void); 48extern int acpi_save_state_mem(void);
46#endif 49#endif
50
51#endif /* __ASM_X86_32_SUSPEND_H */
diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h
index a2f04cd79b29..983ce37c491f 100644
--- a/include/asm-x86/system.h
+++ b/include/asm-x86/system.h
@@ -136,7 +136,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \
136#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base)) 136#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
137#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1)) 137#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
138 138
139extern void load_gs_index(unsigned); 139extern void native_load_gs_index(unsigned);
140 140
141/* 141/*
142 * Load a segment. Fall back on loading the zero 142 * Load a segment. Fall back on loading the zero
@@ -153,14 +153,14 @@ extern void load_gs_index(unsigned);
153 "jmp 2b\n" \ 153 "jmp 2b\n" \
154 ".previous\n" \ 154 ".previous\n" \
155 _ASM_EXTABLE(1b,3b) \ 155 _ASM_EXTABLE(1b,3b) \
156 : :"r" (value), "r" (0)) 156 : :"r" (value), "r" (0) : "memory")
157 157
158 158
159/* 159/*
160 * Save a segment register away 160 * Save a segment register away
161 */ 161 */
162#define savesegment(seg, value) \ 162#define savesegment(seg, value) \
163 asm volatile("mov %%" #seg ",%0":"=rm" (value)) 163 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
164 164
165static inline unsigned long get_limit(unsigned long segment) 165static inline unsigned long get_limit(unsigned long segment)
166{ 166{
@@ -282,6 +282,7 @@ static inline void native_wbinvd(void)
282#ifdef CONFIG_X86_64 282#ifdef CONFIG_X86_64
283#define read_cr8() (native_read_cr8()) 283#define read_cr8() (native_read_cr8())
284#define write_cr8(x) (native_write_cr8(x)) 284#define write_cr8(x) (native_write_cr8(x))
285#define load_gs_index native_load_gs_index
285#endif 286#endif
286 287
287/* Clear the 'TS' bit */ 288/* Clear the 'TS' bit */
@@ -289,7 +290,7 @@ static inline void native_wbinvd(void)
289 290
290#endif/* CONFIG_PARAVIRT */ 291#endif/* CONFIG_PARAVIRT */
291 292
292#define stts() write_cr0(8 | read_cr0()) 293#define stts() write_cr0(read_cr0() | X86_CR0_TS)
293 294
294#endif /* __KERNEL__ */ 295#endif /* __KERNEL__ */
295 296
@@ -303,7 +304,6 @@ static inline void clflush(volatile void *__p)
303void disable_hlt(void); 304void disable_hlt(void);
304void enable_hlt(void); 305void enable_hlt(void);
305 306
306extern int es7000_plat;
307void cpu_idle_wait(void); 307void cpu_idle_wait(void);
308 308
309extern unsigned long arch_align_stack(unsigned long sp); 309extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 77244f17993f..895339d2bc0b 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -1,9 +1,253 @@
1/* thread_info.h: low-level thread information
2 *
3 * Copyright (C) 2002 David Howells (dhowells@redhat.com)
4 * - Incorporating suggestions made by Linus Torvalds and Dave Miller
5 */
6
1#ifndef _ASM_X86_THREAD_INFO_H 7#ifndef _ASM_X86_THREAD_INFO_H
8#define _ASM_X86_THREAD_INFO_H
9
10#include <linux/compiler.h>
11#include <asm/page.h>
12#include <asm/types.h>
13
14/*
15 * low level task data that entry.S needs immediate access to
16 * - this struct should fit entirely inside of one cache line
17 * - this struct shares the supervisor stack pages
18 */
19#ifndef __ASSEMBLY__
20struct task_struct;
21struct exec_domain;
22#include <asm/processor.h>
23
24struct thread_info {
25 struct task_struct *task; /* main task structure */
26 struct exec_domain *exec_domain; /* execution domain */
27 unsigned long flags; /* low level flags */
28 __u32 status; /* thread synchronous flags */
29 __u32 cpu; /* current CPU */
30 int preempt_count; /* 0 => preemptable,
31 <0 => BUG */
32 mm_segment_t addr_limit;
33 struct restart_block restart_block;
34 void __user *sysenter_return;
35#ifdef CONFIG_X86_32
36 unsigned long previous_esp; /* ESP of the previous stack in
37 case of nested (IRQ) stacks
38 */
39 __u8 supervisor_stack[0];
40#endif
41};
42
43#define INIT_THREAD_INFO(tsk) \
44{ \
45 .task = &tsk, \
46 .exec_domain = &default_exec_domain, \
47 .flags = 0, \
48 .cpu = 0, \
49 .preempt_count = 1, \
50 .addr_limit = KERNEL_DS, \
51 .restart_block = { \
52 .fn = do_no_restart_syscall, \
53 }, \
54}
55
56#define init_thread_info (init_thread_union.thread_info)
57#define init_stack (init_thread_union.stack)
58
59#else /* !__ASSEMBLY__ */
60
61#include <asm/asm-offsets.h>
62
63#endif
64
65/*
66 * thread information flags
67 * - these are process state flags that various assembly files
68 * may need to access
69 * - pending work-to-be-done flags are in LSW
70 * - other flags in MSW
71 * Warning: layout of LSW is hardcoded in entry.S
72 */
73#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
74#define TIF_SIGPENDING 2 /* signal pending */
75#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
76#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
77#define TIF_IRET 5 /* force IRET */
2#ifdef CONFIG_X86_32 78#ifdef CONFIG_X86_32
3# include "thread_info_32.h" 79#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
80#endif
81#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
82#define TIF_SECCOMP 8 /* secure computing */
83#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
84#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */
85#define TIF_NOTSC 16 /* TSC is not accessible in userland */
86#define TIF_IA32 17 /* 32bit process */
87#define TIF_FORK 18 /* ret_from_fork */
88#define TIF_ABI_PENDING 19
89#define TIF_MEMDIE 20
90#define TIF_DEBUG 21 /* uses debug registers */
91#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
92#define TIF_FREEZE 23 /* is freezing for suspend */
93#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
94#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
95#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
96#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
97
98#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
99#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
100#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
101#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
102#define _TIF_IRET (1 << TIF_IRET)
103#ifdef CONFIG_X86_32
104#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
4#else 105#else
5# include "thread_info_64.h" 106#define _TIF_SYSCALL_EMU 0
6#endif 107#endif
108#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
109#define _TIF_SECCOMP (1 << TIF_SECCOMP)
110#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
111#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED)
112#define _TIF_NOTSC (1 << TIF_NOTSC)
113#define _TIF_IA32 (1 << TIF_IA32)
114#define _TIF_FORK (1 << TIF_FORK)
115#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING)
116#define _TIF_DEBUG (1 << TIF_DEBUG)
117#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
118#define _TIF_FREEZE (1 << TIF_FREEZE)
119#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
120#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
121#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
122#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
123
124/* work to do on interrupt/exception return */
125#define _TIF_WORK_MASK \
126 (0x0000FFFF & \
127 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP| \
128 _TIF_SECCOMP|_TIF_SYSCALL_EMU))
129
130/* work to do on any return to user space */
131#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
132
133/* Only used for 64 bit */
134#define _TIF_DO_NOTIFY_MASK \
135 (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
136
137/* flags to check in __switch_to() */
138#define _TIF_WORK_CTXSW \
139 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \
140 _TIF_NOTSC)
141
142#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
143#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
144
145#define PREEMPT_ACTIVE 0x10000000
146
147/* thread information allocation */
148#ifdef CONFIG_DEBUG_STACK_USAGE
149#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
150#else
151#define THREAD_FLAGS GFP_KERNEL
152#endif
153
154#define alloc_thread_info(tsk) \
155 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
156
157#ifdef CONFIG_X86_32
158
159#define STACK_WARN (THREAD_SIZE/8)
160/*
161 * macros/functions for gaining access to the thread information structure
162 *
163 * preempt_count needs to be 1 initially, until the scheduler is functional.
164 */
165#ifndef __ASSEMBLY__
166
167
168/* how to get the current stack pointer from C */
169register unsigned long current_stack_pointer asm("esp") __used;
170
171/* how to get the thread information struct from C */
172static inline struct thread_info *current_thread_info(void)
173{
174 return (struct thread_info *)
175 (current_stack_pointer & ~(THREAD_SIZE - 1));
176}
177
178#else /* !__ASSEMBLY__ */
179
180/* how to get the thread information struct from ASM */
181#define GET_THREAD_INFO(reg) \
182 movl $-THREAD_SIZE, reg; \
183 andl %esp, reg
184
185/* use this one if reg already contains %esp */
186#define GET_THREAD_INFO_WITH_ESP(reg) \
187 andl $-THREAD_SIZE, reg
188
189#endif
190
191#else /* X86_32 */
192
193#include <asm/pda.h>
194
195/*
196 * macros/functions for gaining access to the thread information structure
197 * preempt_count needs to be 1 initially, until the scheduler is functional.
198 */
199#ifndef __ASSEMBLY__
200static inline struct thread_info *current_thread_info(void)
201{
202 struct thread_info *ti;
203 ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
204 return ti;
205}
206
207/* do not use in interrupt context */
208static inline struct thread_info *stack_thread_info(void)
209{
210 struct thread_info *ti;
211 asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
212 return ti;
213}
214
215#else /* !__ASSEMBLY__ */
216
217/* how to get the thread information struct from ASM */
218#define GET_THREAD_INFO(reg) \
219 movq %gs:pda_kernelstack,reg ; \
220 subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
221
222#endif
223
224#endif /* !X86_32 */
225
226/*
227 * Thread-synchronous status.
228 *
229 * This is different from the flags in that nobody else
230 * ever touches our thread-synchronous status, so we don't
231 * have to worry about atomic accesses.
232 */
233#define TS_USEDFPU 0x0001 /* FPU was used by this task
234 this quantum (SMP) */
235#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
236#define TS_POLLING 0x0004 /* true if in idle loop
237 and not sleeping */
238#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
239
240#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
241
242#ifndef __ASSEMBLY__
243#define HAVE_SET_RESTORE_SIGMASK 1
244static inline void set_restore_sigmask(void)
245{
246 struct thread_info *ti = current_thread_info();
247 ti->status |= TS_RESTORE_SIGMASK;
248 set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
249}
250#endif /* !__ASSEMBLY__ */
7 251
8#ifndef __ASSEMBLY__ 252#ifndef __ASSEMBLY__
9extern void arch_task_cache_init(void); 253extern void arch_task_cache_init(void);
diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h
deleted file mode 100644
index b6338829d1a8..000000000000
--- a/include/asm-x86/thread_info_32.h
+++ /dev/null
@@ -1,205 +0,0 @@
1/* thread_info.h: i386 low-level thread information
2 *
3 * Copyright (C) 2002 David Howells (dhowells@redhat.com)
4 * - Incorporating suggestions made by Linus Torvalds and Dave Miller
5 */
6
7#ifndef _ASM_THREAD_INFO_H
8#define _ASM_THREAD_INFO_H
9
10#ifdef __KERNEL__
11
12#include <linux/compiler.h>
13#include <asm/page.h>
14
15#ifndef __ASSEMBLY__
16#include <asm/processor.h>
17#endif
18
19/*
20 * low level task data that entry.S needs immediate access to
21 * - this struct should fit entirely inside of one cache line
22 * - this struct shares the supervisor stack pages
23 * - if the contents of this structure are changed,
24 * the assembly constants must also be changed
25 */
26#ifndef __ASSEMBLY__
27
28struct thread_info {
29 struct task_struct *task; /* main task structure */
30 struct exec_domain *exec_domain; /* execution domain */
31 unsigned long flags; /* low level flags */
32 unsigned long status; /* thread-synchronous flags */
33 __u32 cpu; /* current CPU */
34 int preempt_count; /* 0 => preemptable,
35 <0 => BUG */
36 mm_segment_t addr_limit; /* thread address space:
37 0-0xBFFFFFFF user-thread
38 0-0xFFFFFFFF kernel-thread
39 */
40 void *sysenter_return;
41 struct restart_block restart_block;
42 unsigned long previous_esp; /* ESP of the previous stack in
43 case of nested (IRQ) stacks
44 */
45 __u8 supervisor_stack[0];
46};
47
48#else /* !__ASSEMBLY__ */
49
50#include <asm/asm-offsets.h>
51
52#endif
53
54#define PREEMPT_ACTIVE 0x10000000
55#ifdef CONFIG_4KSTACKS
56#define THREAD_SIZE (4096)
57#else
58#define THREAD_SIZE (8192)
59#endif
60
61#define STACK_WARN (THREAD_SIZE/8)
62/*
63 * macros/functions for gaining access to the thread information structure
64 *
65 * preempt_count needs to be 1 initially, until the scheduler is functional.
66 */
67#ifndef __ASSEMBLY__
68
69#define INIT_THREAD_INFO(tsk) \
70{ \
71 .task = &tsk, \
72 .exec_domain = &default_exec_domain, \
73 .flags = 0, \
74 .cpu = 0, \
75 .preempt_count = 1, \
76 .addr_limit = KERNEL_DS, \
77 .restart_block = { \
78 .fn = do_no_restart_syscall, \
79 }, \
80}
81
82#define init_thread_info (init_thread_union.thread_info)
83#define init_stack (init_thread_union.stack)
84
85
86/* how to get the current stack pointer from C */
87register unsigned long current_stack_pointer asm("esp") __used;
88
89/* how to get the thread information struct from C */
90static inline struct thread_info *current_thread_info(void)
91{
92 return (struct thread_info *)
93 (current_stack_pointer & ~(THREAD_SIZE - 1));
94}
95
96/* thread information allocation */
97#ifdef CONFIG_DEBUG_STACK_USAGE
98#define alloc_thread_info(tsk) ((struct thread_info *) \
99 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(THREAD_SIZE)))
100#else
101#define alloc_thread_info(tsk) ((struct thread_info *) \
102 __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE)))
103#endif
104
105#else /* !__ASSEMBLY__ */
106
107/* how to get the thread information struct from ASM */
108#define GET_THREAD_INFO(reg) \
109 movl $-THREAD_SIZE, reg; \
110 andl %esp, reg
111
112/* use this one if reg already contains %esp */
113#define GET_THREAD_INFO_WITH_ESP(reg) \
114 andl $-THREAD_SIZE, reg
115
116#endif
117
118/*
119 * thread information flags
120 * - these are process state flags that various
121 * assembly files may need to access
122 * - pending work-to-be-done flags are in LSW
123 * - other flags in MSW
124 */
125#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
126#define TIF_SIGPENDING 1 /* signal pending */
127#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
128#define TIF_SINGLESTEP 3 /* restore singlestep on return to
129 user mode */
130#define TIF_IRET 4 /* return with iret */
131#define TIF_SYSCALL_EMU 5 /* syscall emulation active */
132#define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */
133#define TIF_SECCOMP 7 /* secure computing */
134#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */
135#define TIF_MEMDIE 16
136#define TIF_DEBUG 17 /* uses debug registers */
137#define TIF_IO_BITMAP 18 /* uses I/O bitmap */
138#define TIF_FREEZE 19 /* is freezing for suspend */
139#define TIF_NOTSC 20 /* TSC is not accessible in userland */
140#define TIF_FORCED_TF 21 /* true if TF in eflags artificially */
141#define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */
142#define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */
143#define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */
144
145#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
146#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
147#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
148#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
149#define _TIF_IRET (1 << TIF_IRET)
150#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
151#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
152#define _TIF_SECCOMP (1 << TIF_SECCOMP)
153#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED)
154#define _TIF_DEBUG (1 << TIF_DEBUG)
155#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
156#define _TIF_FREEZE (1 << TIF_FREEZE)
157#define _TIF_NOTSC (1 << TIF_NOTSC)
158#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
159#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
160#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
161#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
162
163/* work to do on interrupt/exception return */
164#define _TIF_WORK_MASK \
165 (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
166 _TIF_SECCOMP | _TIF_SYSCALL_EMU))
167/* work to do on any return to u-space */
168#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
169
170/* flags to check in __switch_to() */
171#define _TIF_WORK_CTXSW \
172 (_TIF_IO_BITMAP | _TIF_NOTSC | _TIF_DEBUGCTLMSR | \
173 _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS)
174#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
175#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
176
177
178/*
179 * Thread-synchronous status.
180 *
181 * This is different from the flags in that nobody else
182 * ever touches our thread-synchronous status, so we don't
183 * have to worry about atomic accesses.
184 */
185#define TS_USEDFPU 0x0001 /* FPU was used by this task
186 this quantum (SMP) */
187#define TS_POLLING 0x0002 /* True if in idle loop
188 and not sleeping */
189#define TS_RESTORE_SIGMASK 0x0004 /* restore signal mask in do_signal() */
190
191#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
192
193#ifndef __ASSEMBLY__
194#define HAVE_SET_RESTORE_SIGMASK 1
195static inline void set_restore_sigmask(void)
196{
197 struct thread_info *ti = current_thread_info();
198 ti->status |= TS_RESTORE_SIGMASK;
199 set_bit(TIF_SIGPENDING, &ti->flags);
200}
201#endif /* !__ASSEMBLY__ */
202
203#endif /* __KERNEL__ */
204
205#endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h
deleted file mode 100644
index cb69f70abba1..000000000000
--- a/include/asm-x86/thread_info_64.h
+++ /dev/null
@@ -1,195 +0,0 @@
1/* thread_info.h: x86_64 low-level thread information
2 *
3 * Copyright (C) 2002 David Howells (dhowells@redhat.com)
4 * - Incorporating suggestions made by Linus Torvalds and Dave Miller
5 */
6
7#ifndef _ASM_THREAD_INFO_H
8#define _ASM_THREAD_INFO_H
9
10#ifdef __KERNEL__
11
12#include <asm/page.h>
13#include <asm/types.h>
14#include <asm/pda.h>
15
16/*
17 * low level task data that entry.S needs immediate access to
18 * - this struct should fit entirely inside of one cache line
19 * - this struct shares the supervisor stack pages
20 */
21#ifndef __ASSEMBLY__
22struct task_struct;
23struct exec_domain;
24#include <asm/processor.h>
25
26struct thread_info {
27 struct task_struct *task; /* main task structure */
28 struct exec_domain *exec_domain; /* execution domain */
29 __u32 flags; /* low level flags */
30 __u32 status; /* thread synchronous flags */
31 __u32 cpu; /* current CPU */
32 int preempt_count; /* 0 => preemptable,
33 <0 => BUG */
34 mm_segment_t addr_limit;
35 struct restart_block restart_block;
36#ifdef CONFIG_IA32_EMULATION
37 void __user *sysenter_return;
38#endif
39};
40#endif
41
42/*
43 * macros/functions for gaining access to the thread information structure
44 * preempt_count needs to be 1 initially, until the scheduler is functional.
45 */
46#ifndef __ASSEMBLY__
47#define INIT_THREAD_INFO(tsk) \
48{ \
49 .task = &tsk, \
50 .exec_domain = &default_exec_domain, \
51 .flags = 0, \
52 .cpu = 0, \
53 .preempt_count = 1, \
54 .addr_limit = KERNEL_DS, \
55 .restart_block = { \
56 .fn = do_no_restart_syscall, \
57 }, \
58}
59
60#define init_thread_info (init_thread_union.thread_info)
61#define init_stack (init_thread_union.stack)
62
63static inline struct thread_info *current_thread_info(void)
64{
65 struct thread_info *ti;
66 ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
67 return ti;
68}
69
70/* do not use in interrupt context */
71static inline struct thread_info *stack_thread_info(void)
72{
73 struct thread_info *ti;
74 asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
75 return ti;
76}
77
78/* thread information allocation */
79#ifdef CONFIG_DEBUG_STACK_USAGE
80#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
81#else
82#define THREAD_FLAGS GFP_KERNEL
83#endif
84
85#define alloc_thread_info(tsk) \
86 ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
87
88#else /* !__ASSEMBLY__ */
89
90/* how to get the thread information struct from ASM */
91#define GET_THREAD_INFO(reg) \
92 movq %gs:pda_kernelstack,reg ; \
93 subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
94
95#endif
96
97/*
98 * thread information flags
99 * - these are process state flags that various assembly files
100 * may need to access
101 * - pending work-to-be-done flags are in LSW
102 * - other flags in MSW
103 * Warning: layout of LSW is hardcoded in entry.S
104 */
105#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
106#define TIF_SIGPENDING 2 /* signal pending */
107#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
108#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
109#define TIF_IRET 5 /* force IRET */
110#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
111#define TIF_SECCOMP 8 /* secure computing */
112#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
113#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */
114/* 16 free */
115#define TIF_IA32 17 /* 32bit process */
116#define TIF_FORK 18 /* ret_from_fork */
117#define TIF_ABI_PENDING 19
118#define TIF_MEMDIE 20
119#define TIF_DEBUG 21 /* uses debug registers */
120#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
121#define TIF_FREEZE 23 /* is freezing for suspend */
122#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
123#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
124#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
125#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
126#define TIF_NOTSC 28 /* TSC is not accessible in userland */
127
128#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
129#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
130#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
131#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
132#define _TIF_IRET (1 << TIF_IRET)
133#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
134#define _TIF_SECCOMP (1 << TIF_SECCOMP)
135#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
136#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED)
137#define _TIF_IA32 (1 << TIF_IA32)
138#define _TIF_FORK (1 << TIF_FORK)
139#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING)
140#define _TIF_DEBUG (1 << TIF_DEBUG)
141#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
142#define _TIF_FREEZE (1 << TIF_FREEZE)
143#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
144#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
145#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
146#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
147#define _TIF_NOTSC (1 << TIF_NOTSC)
148
149/* work to do on interrupt/exception return */
150#define _TIF_WORK_MASK \
151 (0x0000FFFF & \
152 ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP))
153/* work to do on any return to user space */
154#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
155
156#define _TIF_DO_NOTIFY_MASK \
157 (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
158
159/* flags to check in __switch_to() */
160#define _TIF_WORK_CTXSW \
161 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC)
162#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
163#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
164
165#define PREEMPT_ACTIVE 0x10000000
166
167/*
168 * Thread-synchronous status.
169 *
170 * This is different from the flags in that nobody else
171 * ever touches our thread-synchronous status, so we don't
172 * have to worry about atomic accesses.
173 */
174#define TS_USEDFPU 0x0001 /* FPU was used by this task
175 this quantum (SMP) */
176#define TS_COMPAT 0x0002 /* 32bit syscall active */
177#define TS_POLLING 0x0004 /* true if in idle loop
178 and not sleeping */
179#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
180
181#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
182
183#ifndef __ASSEMBLY__
184#define HAVE_SET_RESTORE_SIGMASK 1
185static inline void set_restore_sigmask(void)
186{
187 struct thread_info *ti = current_thread_info();
188 ti->status |= TS_RESTORE_SIGMASK;
189 set_bit(TIF_SIGPENDING, &ti->flags);
190}
191#endif /* !__ASSEMBLY__ */
192
193#endif /* __KERNEL__ */
194
195#endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h
index bce72d7a958c..a17fa473e91d 100644
--- a/include/asm-x86/time.h
+++ b/include/asm-x86/time.h
@@ -56,4 +56,6 @@ static inline int native_set_wallclock(unsigned long nowtime)
56 56
57#endif /* CONFIG_PARAVIRT */ 57#endif /* CONFIG_PARAVIRT */
58 58
59extern unsigned long __init calibrate_cpu(void);
60
59#endif 61#endif
diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h
index 4f6fcb050c11..fb2a4ddddf3d 100644
--- a/include/asm-x86/timer.h
+++ b/include/asm-x86/timer.h
@@ -7,14 +7,14 @@
7#define TICK_SIZE (tick_nsec / 1000) 7#define TICK_SIZE (tick_nsec / 1000)
8 8
9unsigned long long native_sched_clock(void); 9unsigned long long native_sched_clock(void);
10unsigned long native_calculate_cpu_khz(void); 10unsigned long native_calibrate_tsc(void);
11 11
12extern int timer_ack; 12extern int timer_ack;
13extern int no_timer_check; 13extern int no_timer_check;
14extern int recalibrate_cpu_khz(void); 14extern int recalibrate_cpu_khz(void);
15 15
16#ifndef CONFIG_PARAVIRT 16#ifndef CONFIG_PARAVIRT
17#define calculate_cpu_khz() native_calculate_cpu_khz() 17#define calibrate_tsc() native_calibrate_tsc()
18#endif 18#endif
19 19
20/* Accelerators for sched_clock() 20/* Accelerators for sched_clock()
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index dcf3f8131d6b..98e5f17ea856 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -35,79 +35,93 @@
35# endif 35# endif
36#endif 36#endif
37 37
38/* Node not present */
39#define NUMA_NO_NODE (-1)
40
38#ifdef CONFIG_NUMA 41#ifdef CONFIG_NUMA
39#include <linux/cpumask.h> 42#include <linux/cpumask.h>
40#include <asm/mpspec.h> 43#include <asm/mpspec.h>
41 44
42/* Mappings between logical cpu number and node number */
43#ifdef CONFIG_X86_32 45#ifdef CONFIG_X86_32
44extern int cpu_to_node_map[];
45#else
46/* Returns the number of the current Node. */
47#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id()))
48#endif
49
50DECLARE_PER_CPU(int, x86_cpu_to_node_map);
51
52#ifdef CONFIG_SMP
53extern int x86_cpu_to_node_map_init[];
54extern void *x86_cpu_to_node_map_early_ptr;
55#else
56#define x86_cpu_to_node_map_early_ptr NULL
57#endif
58 46
47/* Mappings between node number and cpus on that node. */
59extern cpumask_t node_to_cpumask_map[]; 48extern cpumask_t node_to_cpumask_map[];
60 49
61#define NUMA_NO_NODE (-1) 50/* Mappings between logical cpu number and node number */
51extern int cpu_to_node_map[];
62 52
63/* Returns the number of the node containing CPU 'cpu' */ 53/* Returns the number of the node containing CPU 'cpu' */
64#ifdef CONFIG_X86_32
65#define early_cpu_to_node(cpu) cpu_to_node(cpu)
66static inline int cpu_to_node(int cpu) 54static inline int cpu_to_node(int cpu)
67{ 55{
68 return cpu_to_node_map[cpu]; 56 return cpu_to_node_map[cpu];
69} 57}
58#define early_cpu_to_node(cpu) cpu_to_node(cpu)
70 59
71#else /* CONFIG_X86_64 */ 60/* Returns a bitmask of CPUs on Node 'node'.
72 61 *
73#ifdef CONFIG_SMP 62 * Side note: this function creates the returned cpumask on the stack
74static inline int early_cpu_to_node(int cpu) 63 * so with a high NR_CPUS count, excessive stack space is used. The
64 * node_to_cpumask_ptr function should be used whenever possible.
65 */
66static inline cpumask_t node_to_cpumask(int node)
75{ 67{
76 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; 68 return node_to_cpumask_map[node];
77
78 if (cpu_to_node_map)
79 return cpu_to_node_map[cpu];
80 else if (per_cpu_offset(cpu))
81 return per_cpu(x86_cpu_to_node_map, cpu);
82 else
83 return NUMA_NO_NODE;
84} 69}
85#else
86#define early_cpu_to_node(cpu) cpu_to_node(cpu)
87#endif
88 70
71#else /* CONFIG_X86_64 */
72
73/* Mappings between node number and cpus on that node. */
74extern cpumask_t *node_to_cpumask_map;
75
76/* Mappings between logical cpu number and node number */
77DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
78
79/* Returns the number of the current Node. */
80#define numa_node_id() read_pda(nodenumber)
81
82#ifdef CONFIG_DEBUG_PER_CPU_MAPS
83extern int cpu_to_node(int cpu);
84extern int early_cpu_to_node(int cpu);
85extern cpumask_t *_node_to_cpumask_ptr(int node);
86extern cpumask_t node_to_cpumask(int node);
87
88#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
89
90/* Returns the number of the node containing CPU 'cpu' */
89static inline int cpu_to_node(int cpu) 91static inline int cpu_to_node(int cpu)
90{ 92{
91#ifdef CONFIG_DEBUG_PER_CPU_MAPS
92 if (x86_cpu_to_node_map_early_ptr) {
93 printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
94 (int)cpu);
95 dump_stack();
96 return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
97 }
98#endif
99 return per_cpu(x86_cpu_to_node_map, cpu); 93 return per_cpu(x86_cpu_to_node_map, cpu);
100} 94}
101 95
102#ifdef CONFIG_NUMA 96/* Same function but used if called before per_cpu areas are setup */
97static inline int early_cpu_to_node(int cpu)
98{
99 if (early_per_cpu_ptr(x86_cpu_to_node_map))
100 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
101
102 return per_cpu(x86_cpu_to_node_map, cpu);
103}
103 104
104/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 105/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
106static inline cpumask_t *_node_to_cpumask_ptr(int node)
107{
108 return &node_to_cpumask_map[node];
109}
110
111/* Returns a bitmask of CPUs on Node 'node'. */
112static inline cpumask_t node_to_cpumask(int node)
113{
114 return node_to_cpumask_map[node];
115}
116
117#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
118
119/* Replace default node_to_cpumask_ptr with optimized version */
105#define node_to_cpumask_ptr(v, node) \ 120#define node_to_cpumask_ptr(v, node) \
106 cpumask_t *v = &(node_to_cpumask_map[node]) 121 cpumask_t *v = _node_to_cpumask_ptr(node)
107 122
108#define node_to_cpumask_ptr_next(v, node) \ 123#define node_to_cpumask_ptr_next(v, node) \
109 v = &(node_to_cpumask_map[node]) 124 v = _node_to_cpumask_ptr(node)
110#endif
111 125
112#endif /* CONFIG_X86_64 */ 126#endif /* CONFIG_X86_64 */
113 127
@@ -117,20 +131,6 @@ static inline int cpu_to_node(int cpu)
117 */ 131 */
118#define parent_node(node) (node) 132#define parent_node(node) (node)
119 133
120/* Returns a bitmask of CPUs on Node 'node'. */
121static inline cpumask_t node_to_cpumask(int node)
122{
123 return node_to_cpumask_map[node];
124}
125
126/* Returns the number of the first CPU on Node 'node'. */
127static inline int node_to_first_cpu(int node)
128{
129 cpumask_t mask = node_to_cpumask(node);
130
131 return first_cpu(mask);
132}
133
134#define pcibus_to_node(bus) __pcibus_to_node(bus) 134#define pcibus_to_node(bus) __pcibus_to_node(bus)
135#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) 135#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
136 136
@@ -180,12 +180,44 @@ extern int __node_distance(int, int);
180#define node_distance(a, b) __node_distance(a, b) 180#define node_distance(a, b) __node_distance(a, b)
181#endif 181#endif
182 182
183#else /* CONFIG_NUMA */ 183#else /* !CONFIG_NUMA */
184 184
185#define numa_node_id() 0
186#define cpu_to_node(cpu) 0
187#define early_cpu_to_node(cpu) 0
188
189static inline cpumask_t *_node_to_cpumask_ptr(int node)
190{
191 return &cpu_online_map;
192}
193static inline cpumask_t node_to_cpumask(int node)
194{
195 return cpu_online_map;
196}
197static inline int node_to_first_cpu(int node)
198{
199 return first_cpu(cpu_online_map);
200}
201
202/* Replace default node_to_cpumask_ptr with optimized version */
203#define node_to_cpumask_ptr(v, node) \
204 cpumask_t *v = _node_to_cpumask_ptr(node)
205
206#define node_to_cpumask_ptr_next(v, node) \
207 v = _node_to_cpumask_ptr(node)
185#endif 208#endif
186 209
187#include <asm-generic/topology.h> 210#include <asm-generic/topology.h>
188 211
212#ifdef CONFIG_NUMA
213/* Returns the number of the first CPU on Node 'node'. */
214static inline int node_to_first_cpu(int node)
215{
216 node_to_cpumask_ptr(mask, node);
217 return first_cpu(*mask);
218}
219#endif
220
189extern cpumask_t cpu_coregroup_map(int cpu); 221extern cpumask_t cpu_coregroup_map(int cpu);
190 222
191#ifdef ENABLE_TOPO_DEFINES 223#ifdef ENABLE_TOPO_DEFINES
@@ -193,6 +225,9 @@ extern cpumask_t cpu_coregroup_map(int cpu);
193#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 225#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
194#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 226#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
195#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 227#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
228
229/* indicates that pointers to the topology cpumask_t maps are valid */
230#define arch_provides_topology_pointers yes
196#endif 231#endif
197 232
198static inline void arch_fix_phys_package_id(int num, u32 slot) 233static inline void arch_fix_phys_package_id(int num, u32 slot)
@@ -220,4 +255,4 @@ static inline void set_mp_bus_to_node(int busnum, int node)
220} 255}
221#endif 256#endif
222 257
223#endif 258#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index 548873ab5fc1..cb6f6ee45b8f 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -48,7 +48,6 @@ static __always_inline cycles_t vget_cycles(void)
48extern void tsc_init(void); 48extern void tsc_init(void);
49extern void mark_tsc_unstable(char *reason); 49extern void mark_tsc_unstable(char *reason);
50extern int unsynchronized_tsc(void); 50extern int unsynchronized_tsc(void);
51extern void init_tsc_clocksource(void);
52int check_tsc_unstable(void); 51int check_tsc_unstable(void);
53 52
54/* 53/*
@@ -58,7 +57,6 @@ int check_tsc_unstable(void);
58extern void check_tsc_sync_source(int cpu); 57extern void check_tsc_sync_source(int cpu);
59extern void check_tsc_sync_target(void); 58extern void check_tsc_sync_target(void);
60 59
61extern void tsc_calibrate(void);
62extern int notsc_setup(char *); 60extern int notsc_setup(char *);
63 61
64#endif 62#endif
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 9fefd2947e78..f6fa4d841bbc 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -1,5 +1,453 @@
1#ifndef _ASM_UACCES_H_
2#define _ASM_UACCES_H_
3/*
4 * User space memory access functions
5 */
6#include <linux/errno.h>
7#include <linux/compiler.h>
8#include <linux/thread_info.h>
9#include <linux/prefetch.h>
10#include <linux/string.h>
11#include <asm/asm.h>
12#include <asm/page.h>
13
14#define VERIFY_READ 0
15#define VERIFY_WRITE 1
16
17/*
18 * The fs value determines whether argument validity checking should be
19 * performed or not. If get_fs() == USER_DS, checking is performed, with
20 * get_fs() == KERNEL_DS, checking is bypassed.
21 *
22 * For historical reasons, these macros are grossly misnamed.
23 */
24
25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
26
27#define KERNEL_DS MAKE_MM_SEG(-1UL)
28#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
29
30#define get_ds() (KERNEL_DS)
31#define get_fs() (current_thread_info()->addr_limit)
32#define set_fs(x) (current_thread_info()->addr_limit = (x))
33
34#define segment_eq(a, b) ((a).seg == (b).seg)
35
36#define __addr_ok(addr) \
37 ((unsigned long __force)(addr) < \
38 (current_thread_info()->addr_limit.seg))
39
40/*
41 * Test whether a block of memory is a valid user space address.
42 * Returns 0 if the range is valid, nonzero otherwise.
43 *
44 * This is equivalent to the following test:
45 * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
46 *
47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
48 */
49
50#define __range_not_ok(addr, size) \
51({ \
52 unsigned long flag, roksum; \
53 __chk_user_ptr(addr); \
54 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \
55 : "=&r" (flag), "=r" (roksum) \
56 : "1" (addr), "g" ((long)(size)), \
57 "rm" (current_thread_info()->addr_limit.seg)); \
58 flag; \
59})
60
61/**
62 * access_ok: - Checks if a user space pointer is valid
63 * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
64 * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
65 * to write to a block, it is always safe to read from it.
66 * @addr: User space pointer to start of block to check
67 * @size: Size of block to check
68 *
69 * Context: User context only. This function may sleep.
70 *
71 * Checks if a pointer to a block of memory in user space is valid.
72 *
73 * Returns true (nonzero) if the memory block may be valid, false (zero)
74 * if it is definitely invalid.
75 *
76 * Note that, depending on architecture, this function probably just
77 * checks that the pointer is in the user space range - after calling
78 * this function, memory access functions may still return -EFAULT.
79 */
80#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0))
81
82/*
83 * The exception table consists of pairs of addresses: the first is the
84 * address of an instruction that is allowed to fault, and the second is
85 * the address at which the program should continue. No registers are
86 * modified, so it is entirely up to the continuation code to figure out
87 * what to do.
88 *
89 * All the routines below use bits of fixup code that are out of line
90 * with the main instruction path. This means when everything is well,
91 * we don't even have to jump over them. Further, they do not intrude
92 * on our cache or tlb entries.
93 */
94
95struct exception_table_entry {
96 unsigned long insn, fixup;
97};
98
99extern int fixup_exception(struct pt_regs *regs);
100
101/*
102 * These are the main single-value transfer routines. They automatically
103 * use the right size if we just have the right pointer type.
104 *
105 * This gets kind of ugly. We want to return _two_ values in "get_user()"
106 * and yet we don't want to do any pointers, because that is too much
107 * of a performance impact. Thus we have a few rather ugly macros here,
108 * and hide all the ugliness from the user.
109 *
110 * The "__xxx" versions of the user access functions are versions that
111 * do not verify the address space, that must have been done previously
112 * with a separate "access_ok()" call (this is used when we do multiple
113 * accesses to the same area of user memory).
114 */
115
116extern int __get_user_1(void);
117extern int __get_user_2(void);
118extern int __get_user_4(void);
119extern int __get_user_8(void);
120extern int __get_user_bad(void);
121
122#define __get_user_x(size, ret, x, ptr) \
123 asm volatile("call __get_user_" #size \
124 : "=a" (ret),"=d" (x) \
125 : "0" (ptr)) \
126
127/* Careful: we have to cast the result to the type of the pointer
128 * for sign reasons */
129
130/**
131 * get_user: - Get a simple variable from user space.
132 * @x: Variable to store result.
133 * @ptr: Source address, in user space.
134 *
135 * Context: User context only. This function may sleep.
136 *
137 * This macro copies a single simple variable from user space to kernel
138 * space. It supports simple types like char and int, but not larger
139 * data types like structures or arrays.
140 *
141 * @ptr must have pointer-to-simple-variable type, and the result of
142 * dereferencing @ptr must be assignable to @x without a cast.
143 *
144 * Returns zero on success, or -EFAULT on error.
145 * On error, the variable @x is set to zero.
146 */
147#ifdef CONFIG_X86_32
148#define __get_user_8(__ret_gu, __val_gu, ptr) \
149 __get_user_x(X, __ret_gu, __val_gu, ptr)
150#else
151#define __get_user_8(__ret_gu, __val_gu, ptr) \
152 __get_user_x(8, __ret_gu, __val_gu, ptr)
153#endif
154
155#define get_user(x, ptr) \
156({ \
157 int __ret_gu; \
158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \
160 switch (sizeof(*(ptr))) { \
161 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \
163 break; \
164 case 2: \
165 __get_user_x(2, __ret_gu, __val_gu, ptr); \
166 break; \
167 case 4: \
168 __get_user_x(4, __ret_gu, __val_gu, ptr); \
169 break; \
170 case 8: \
171 __get_user_8(__ret_gu, __val_gu, ptr); \
172 break; \
173 default: \
174 __get_user_x(X, __ret_gu, __val_gu, ptr); \
175 break; \
176 } \
177 (x) = (__typeof__(*(ptr)))__val_gu; \
178 __ret_gu; \
179})
180
181#define __put_user_x(size, x, ptr, __ret_pu) \
182 asm volatile("call __put_user_" #size : "=a" (__ret_pu) \
183 :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
184
185
186
187#ifdef CONFIG_X86_32
188#define __put_user_u64(x, addr, err) \
189 asm volatile("1: movl %%eax,0(%2)\n" \
190 "2: movl %%edx,4(%2)\n" \
191 "3:\n" \
192 ".section .fixup,\"ax\"\n" \
193 "4: movl %3,%0\n" \
194 " jmp 3b\n" \
195 ".previous\n" \
196 _ASM_EXTABLE(1b, 4b) \
197 _ASM_EXTABLE(2b, 4b) \
198 : "=r" (err) \
199 : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
200
201#define __put_user_x8(x, ptr, __ret_pu) \
202 asm volatile("call __put_user_8" : "=a" (__ret_pu) \
203 : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
204#else
205#define __put_user_u64(x, ptr, retval) \
206 __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
207#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
208#endif
209
210extern void __put_user_bad(void);
211
212/*
213 * Strange magic calling convention: pointer in %ecx,
214 * value in %eax(:%edx), return value in %eax. clobbers %rbx
215 */
216extern void __put_user_1(void);
217extern void __put_user_2(void);
218extern void __put_user_4(void);
219extern void __put_user_8(void);
220
221#ifdef CONFIG_X86_WP_WORKS_OK
222
223/**
224 * put_user: - Write a simple value into user space.
225 * @x: Value to copy to user space.
226 * @ptr: Destination address, in user space.
227 *
228 * Context: User context only. This function may sleep.
229 *
230 * This macro copies a single simple value from kernel space to user
231 * space. It supports simple types like char and int, but not larger
232 * data types like structures or arrays.
233 *
234 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
235 * to the result of dereferencing @ptr.
236 *
237 * Returns zero on success, or -EFAULT on error.
238 */
239#define put_user(x, ptr) \
240({ \
241 int __ret_pu; \
242 __typeof__(*(ptr)) __pu_val; \
243 __chk_user_ptr(ptr); \
244 __pu_val = x; \
245 switch (sizeof(*(ptr))) { \
246 case 1: \
247 __put_user_x(1, __pu_val, ptr, __ret_pu); \
248 break; \
249 case 2: \
250 __put_user_x(2, __pu_val, ptr, __ret_pu); \
251 break; \
252 case 4: \
253 __put_user_x(4, __pu_val, ptr, __ret_pu); \
254 break; \
255 case 8: \
256 __put_user_x8(__pu_val, ptr, __ret_pu); \
257 break; \
258 default: \
259 __put_user_x(X, __pu_val, ptr, __ret_pu); \
260 break; \
261 } \
262 __ret_pu; \
263})
264
265#define __put_user_size(x, ptr, size, retval, errret) \
266do { \
267 retval = 0; \
268 __chk_user_ptr(ptr); \
269 switch (size) { \
270 case 1: \
271 __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \
272 break; \
273 case 2: \
274 __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \
275 break; \
276 case 4: \
277 __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\
278 break; \
279 case 8: \
280 __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \
281 break; \
282 default: \
283 __put_user_bad(); \
284 } \
285} while (0)
286
287#else
288
289#define __put_user_size(x, ptr, size, retval, errret) \
290do { \
291 __typeof__(*(ptr))__pus_tmp = x; \
292 retval = 0; \
293 \
294 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
295 retval = errret; \
296} while (0)
297
298#define put_user(x, ptr) \
299({ \
300 int __ret_pu; \
301 __typeof__(*(ptr))__pus_tmp = x; \
302 __ret_pu = 0; \
303 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \
304 sizeof(*(ptr))) != 0)) \
305 __ret_pu = -EFAULT; \
306 __ret_pu; \
307})
308#endif
309
310#ifdef CONFIG_X86_32
311#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
312#else
313#define __get_user_asm_u64(x, ptr, retval, errret) \
314 __get_user_asm(x, ptr, retval, "q", "", "=r", errret)
315#endif
316
317#define __get_user_size(x, ptr, size, retval, errret) \
318do { \
319 retval = 0; \
320 __chk_user_ptr(ptr); \
321 switch (size) { \
322 case 1: \
323 __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \
324 break; \
325 case 2: \
326 __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \
327 break; \
328 case 4: \
329 __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \
330 break; \
331 case 8: \
332 __get_user_asm_u64(x, ptr, retval, errret); \
333 break; \
334 default: \
335 (x) = __get_user_bad(); \
336 } \
337} while (0)
338
339#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
340 asm volatile("1: mov"itype" %2,%"rtype"1\n" \
341 "2:\n" \
342 ".section .fixup,\"ax\"\n" \
343 "3: mov %3,%0\n" \
344 " xor"itype" %"rtype"1,%"rtype"1\n" \
345 " jmp 2b\n" \
346 ".previous\n" \
347 _ASM_EXTABLE(1b, 3b) \
348 : "=r" (err), ltype(x) \
349 : "m" (__m(addr)), "i" (errret), "0" (err))
350
351#define __put_user_nocheck(x, ptr, size) \
352({ \
353 long __pu_err; \
354 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
355 __pu_err; \
356})
357
358#define __get_user_nocheck(x, ptr, size) \
359({ \
360 long __gu_err; \
361 unsigned long __gu_val; \
362 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
363 (x) = (__force __typeof__(*(ptr)))__gu_val; \
364 __gu_err; \
365})
366
367/* FIXME: this hack is definitely wrong -AK */
368struct __large_struct { unsigned long buf[100]; };
369#define __m(x) (*(struct __large_struct __user *)(x))
370
371/*
372 * Tell gcc we read from memory instead of writing: this is because
373 * we do not write to any memory gcc knows about, so there are no
374 * aliasing issues.
375 */
376#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
377 asm volatile("1: mov"itype" %"rtype"1,%2\n" \
378 "2:\n" \
379 ".section .fixup,\"ax\"\n" \
380 "3: mov %3,%0\n" \
381 " jmp 2b\n" \
382 ".previous\n" \
383 _ASM_EXTABLE(1b, 3b) \
384 : "=r"(err) \
385 : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
386/**
387 * __get_user: - Get a simple variable from user space, with less checking.
388 * @x: Variable to store result.
389 * @ptr: Source address, in user space.
390 *
391 * Context: User context only. This function may sleep.
392 *
393 * This macro copies a single simple variable from user space to kernel
394 * space. It supports simple types like char and int, but not larger
395 * data types like structures or arrays.
396 *
397 * @ptr must have pointer-to-simple-variable type, and the result of
398 * dereferencing @ptr must be assignable to @x without a cast.
399 *
400 * Caller must check the pointer with access_ok() before calling this
401 * function.
402 *
403 * Returns zero on success, or -EFAULT on error.
404 * On error, the variable @x is set to zero.
405 */
406
407#define __get_user(x, ptr) \
408 __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
409/**
410 * __put_user: - Write a simple value into user space, with less checking.
411 * @x: Value to copy to user space.
412 * @ptr: Destination address, in user space.
413 *
414 * Context: User context only. This function may sleep.
415 *
416 * This macro copies a single simple value from kernel space to user
417 * space. It supports simple types like char and int, but not larger
418 * data types like structures or arrays.
419 *
420 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
421 * to the result of dereferencing @ptr.
422 *
423 * Caller must check the pointer with access_ok() before calling this
424 * function.
425 *
426 * Returns zero on success, or -EFAULT on error.
427 */
428
429#define __put_user(x, ptr) \
430 __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
431
432#define __get_user_unaligned __get_user
433#define __put_user_unaligned __put_user
434
435/*
436 * movsl can be slow when source and dest are not both 8-byte aligned
437 */
438#ifdef CONFIG_X86_INTEL_USERCOPY
439extern struct movsl_mask {
440 int mask;
441} ____cacheline_aligned_in_smp movsl_mask;
442#endif
443
444#define ARCH_HAS_NOCACHE_UACCESS 1
445
1#ifdef CONFIG_X86_32 446#ifdef CONFIG_X86_32
2# include "uaccess_32.h" 447# include "uaccess_32.h"
3#else 448#else
449# define ARCH_HAS_SEARCH_EXTABLE
4# include "uaccess_64.h" 450# include "uaccess_64.h"
5#endif 451#endif
452
453#endif
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index 8e7595c1f34e..6fdef39a0bcb 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -11,426 +11,6 @@
11#include <asm/asm.h> 11#include <asm/asm.h>
12#include <asm/page.h> 12#include <asm/page.h>
13 13
14#define VERIFY_READ 0
15#define VERIFY_WRITE 1
16
17/*
18 * The fs value determines whether argument validity checking should be
19 * performed or not. If get_fs() == USER_DS, checking is performed, with
20 * get_fs() == KERNEL_DS, checking is bypassed.
21 *
22 * For historical reasons, these macros are grossly misnamed.
23 */
24
25#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
26
27
28#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFUL)
29#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
30
31#define get_ds() (KERNEL_DS)
32#define get_fs() (current_thread_info()->addr_limit)
33#define set_fs(x) (current_thread_info()->addr_limit = (x))
34
35#define segment_eq(a, b) ((a).seg == (b).seg)
36
37/*
38 * movsl can be slow when source and dest are not both 8-byte aligned
39 */
40#ifdef CONFIG_X86_INTEL_USERCOPY
41extern struct movsl_mask {
42 int mask;
43} ____cacheline_aligned_in_smp movsl_mask;
44#endif
45
46#define __addr_ok(addr) \
47 ((unsigned long __force)(addr) < \
48 (current_thread_info()->addr_limit.seg))
49
50/*
51 * Test whether a block of memory is a valid user space address.
52 * Returns 0 if the range is valid, nonzero otherwise.
53 *
54 * This is equivalent to the following test:
55 * (u33)addr + (u33)size >= (u33)current->addr_limit.seg
56 *
57 * This needs 33-bit arithmetic. We have a carry...
58 */
59#define __range_ok(addr, size) \
60({ \
61 unsigned long flag, roksum; \
62 __chk_user_ptr(addr); \
63 asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \
64 :"=&r" (flag), "=r" (roksum) \
65 :"1" (addr), "g" ((int)(size)), \
66 "rm" (current_thread_info()->addr_limit.seg)); \
67 flag; \
68})
69
70/**
71 * access_ok: - Checks if a user space pointer is valid
72 * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
73 * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
74 * to write to a block, it is always safe to read from it.
75 * @addr: User space pointer to start of block to check
76 * @size: Size of block to check
77 *
78 * Context: User context only. This function may sleep.
79 *
80 * Checks if a pointer to a block of memory in user space is valid.
81 *
82 * Returns true (nonzero) if the memory block may be valid, false (zero)
83 * if it is definitely invalid.
84 *
85 * Note that, depending on architecture, this function probably just
86 * checks that the pointer is in the user space range - after calling
87 * this function, memory access functions may still return -EFAULT.
88 */
89#define access_ok(type, addr, size) (likely(__range_ok(addr, size) == 0))
90
91/*
92 * The exception table consists of pairs of addresses: the first is the
93 * address of an instruction that is allowed to fault, and the second is
94 * the address at which the program should continue. No registers are
95 * modified, so it is entirely up to the continuation code to figure out
96 * what to do.
97 *
98 * All the routines below use bits of fixup code that are out of line
99 * with the main instruction path. This means when everything is well,
100 * we don't even have to jump over them. Further, they do not intrude
101 * on our cache or tlb entries.
102 */
103
104struct exception_table_entry {
105 unsigned long insn, fixup;
106};
107
108extern int fixup_exception(struct pt_regs *regs);
109
110/*
111 * These are the main single-value transfer routines. They automatically
112 * use the right size if we just have the right pointer type.
113 *
114 * This gets kind of ugly. We want to return _two_ values in "get_user()"
115 * and yet we don't want to do any pointers, because that is too much
116 * of a performance impact. Thus we have a few rather ugly macros here,
117 * and hide all the ugliness from the user.
118 *
119 * The "__xxx" versions of the user access functions are versions that
120 * do not verify the address space, that must have been done previously
121 * with a separate "access_ok()" call (this is used when we do multiple
122 * accesses to the same area of user memory).
123 */
124
125extern void __get_user_1(void);
126extern void __get_user_2(void);
127extern void __get_user_4(void);
128
129#define __get_user_x(size, ret, x, ptr) \
130 asm volatile("call __get_user_" #size \
131 :"=a" (ret),"=d" (x) \
132 :"0" (ptr))
133
134
135/* Careful: we have to cast the result to the type of the pointer
136 * for sign reasons */
137
138/**
139 * get_user: - Get a simple variable from user space.
140 * @x: Variable to store result.
141 * @ptr: Source address, in user space.
142 *
143 * Context: User context only. This function may sleep.
144 *
145 * This macro copies a single simple variable from user space to kernel
146 * space. It supports simple types like char and int, but not larger
147 * data types like structures or arrays.
148 *
149 * @ptr must have pointer-to-simple-variable type, and the result of
150 * dereferencing @ptr must be assignable to @x without a cast.
151 *
152 * Returns zero on success, or -EFAULT on error.
153 * On error, the variable @x is set to zero.
154 */
155#define get_user(x, ptr) \
156({ \
157 int __ret_gu; \
158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \
160 switch (sizeof(*(ptr))) { \
161 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \
163 break; \
164 case 2: \
165 __get_user_x(2, __ret_gu, __val_gu, ptr); \
166 break; \
167 case 4: \
168 __get_user_x(4, __ret_gu, __val_gu, ptr); \
169 break; \
170 default: \
171 __get_user_x(X, __ret_gu, __val_gu, ptr); \
172 break; \
173 } \
174 (x) = (__typeof__(*(ptr)))__val_gu; \
175 __ret_gu; \
176})
177
178extern void __put_user_bad(void);
179
180/*
181 * Strange magic calling convention: pointer in %ecx,
182 * value in %eax(:%edx), return value in %eax, no clobbers.
183 */
184extern void __put_user_1(void);
185extern void __put_user_2(void);
186extern void __put_user_4(void);
187extern void __put_user_8(void);
188
189#define __put_user_1(x, ptr) \
190 asm volatile("call __put_user_1" : "=a" (__ret_pu) \
191 : "0" ((typeof(*(ptr)))(x)), "c" (ptr))
192
193#define __put_user_2(x, ptr) \
194 asm volatile("call __put_user_2" : "=a" (__ret_pu) \
195 : "0" ((typeof(*(ptr)))(x)), "c" (ptr))
196
197#define __put_user_4(x, ptr) \
198 asm volatile("call __put_user_4" : "=a" (__ret_pu) \
199 : "0" ((typeof(*(ptr)))(x)), "c" (ptr))
200
201#define __put_user_8(x, ptr) \
202 asm volatile("call __put_user_8" : "=a" (__ret_pu) \
203 : "A" ((typeof(*(ptr)))(x)), "c" (ptr))
204
205#define __put_user_X(x, ptr) \
206 asm volatile("call __put_user_X" : "=a" (__ret_pu) \
207 : "c" (ptr))
208
209/**
210 * put_user: - Write a simple value into user space.
211 * @x: Value to copy to user space.
212 * @ptr: Destination address, in user space.
213 *
214 * Context: User context only. This function may sleep.
215 *
216 * This macro copies a single simple value from kernel space to user
217 * space. It supports simple types like char and int, but not larger
218 * data types like structures or arrays.
219 *
220 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
221 * to the result of dereferencing @ptr.
222 *
223 * Returns zero on success, or -EFAULT on error.
224 */
225#ifdef CONFIG_X86_WP_WORKS_OK
226
227#define put_user(x, ptr) \
228({ \
229 int __ret_pu; \
230 __typeof__(*(ptr)) __pu_val; \
231 __chk_user_ptr(ptr); \
232 __pu_val = x; \
233 switch (sizeof(*(ptr))) { \
234 case 1: \
235 __put_user_1(__pu_val, ptr); \
236 break; \
237 case 2: \
238 __put_user_2(__pu_val, ptr); \
239 break; \
240 case 4: \
241 __put_user_4(__pu_val, ptr); \
242 break; \
243 case 8: \
244 __put_user_8(__pu_val, ptr); \
245 break; \
246 default: \
247 __put_user_X(__pu_val, ptr); \
248 break; \
249 } \
250 __ret_pu; \
251})
252
253#else
254#define put_user(x, ptr) \
255({ \
256 int __ret_pu; \
257 __typeof__(*(ptr))__pus_tmp = x; \
258 __ret_pu = 0; \
259 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \
260 sizeof(*(ptr))) != 0)) \
261 __ret_pu = -EFAULT; \
262 __ret_pu; \
263})
264
265
266#endif
267
268/**
269 * __get_user: - Get a simple variable from user space, with less checking.
270 * @x: Variable to store result.
271 * @ptr: Source address, in user space.
272 *
273 * Context: User context only. This function may sleep.
274 *
275 * This macro copies a single simple variable from user space to kernel
276 * space. It supports simple types like char and int, but not larger
277 * data types like structures or arrays.
278 *
279 * @ptr must have pointer-to-simple-variable type, and the result of
280 * dereferencing @ptr must be assignable to @x without a cast.
281 *
282 * Caller must check the pointer with access_ok() before calling this
283 * function.
284 *
285 * Returns zero on success, or -EFAULT on error.
286 * On error, the variable @x is set to zero.
287 */
288#define __get_user(x, ptr) \
289 __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
290
291
292/**
293 * __put_user: - Write a simple value into user space, with less checking.
294 * @x: Value to copy to user space.
295 * @ptr: Destination address, in user space.
296 *
297 * Context: User context only. This function may sleep.
298 *
299 * This macro copies a single simple value from kernel space to user
300 * space. It supports simple types like char and int, but not larger
301 * data types like structures or arrays.
302 *
303 * @ptr must have pointer-to-simple-variable type, and @x must be assignable
304 * to the result of dereferencing @ptr.
305 *
306 * Caller must check the pointer with access_ok() before calling this
307 * function.
308 *
309 * Returns zero on success, or -EFAULT on error.
310 */
311#define __put_user(x, ptr) \
312 __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
313
314#define __put_user_nocheck(x, ptr, size) \
315({ \
316 long __pu_err; \
317 __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \
318 __pu_err; \
319})
320
321
322#define __put_user_u64(x, addr, err) \
323 asm volatile("1: movl %%eax,0(%2)\n" \
324 "2: movl %%edx,4(%2)\n" \
325 "3:\n" \
326 ".section .fixup,\"ax\"\n" \
327 "4: movl %3,%0\n" \
328 " jmp 3b\n" \
329 ".previous\n" \
330 _ASM_EXTABLE(1b, 4b) \
331 _ASM_EXTABLE(2b, 4b) \
332 : "=r" (err) \
333 : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
334
335#ifdef CONFIG_X86_WP_WORKS_OK
336
337#define __put_user_size(x, ptr, size, retval, errret) \
338do { \
339 retval = 0; \
340 __chk_user_ptr(ptr); \
341 switch (size) { \
342 case 1: \
343 __put_user_asm(x, ptr, retval, "b", "b", "iq", errret); \
344 break; \
345 case 2: \
346 __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \
347 break; \
348 case 4: \
349 __put_user_asm(x, ptr, retval, "l", "", "ir", errret); \
350 break; \
351 case 8: \
352 __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \
353 break; \
354 default: \
355 __put_user_bad(); \
356 } \
357} while (0)
358
359#else
360
361#define __put_user_size(x, ptr, size, retval, errret) \
362do { \
363 __typeof__(*(ptr))__pus_tmp = x; \
364 retval = 0; \
365 \
366 if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
367 retval = errret; \
368} while (0)
369
370#endif
371struct __large_struct { unsigned long buf[100]; };
372#define __m(x) (*(struct __large_struct __user *)(x))
373
374/*
375 * Tell gcc we read from memory instead of writing: this is because
376 * we do not write to any memory gcc knows about, so there are no
377 * aliasing issues.
378 */
379#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
380 asm volatile("1: mov"itype" %"rtype"1,%2\n" \
381 "2:\n" \
382 ".section .fixup,\"ax\"\n" \
383 "3: movl %3,%0\n" \
384 " jmp 2b\n" \
385 ".previous\n" \
386 _ASM_EXTABLE(1b, 3b) \
387 : "=r"(err) \
388 : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err))
389
390
391#define __get_user_nocheck(x, ptr, size) \
392({ \
393 long __gu_err; \
394 unsigned long __gu_val; \
395 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
396 (x) = (__typeof__(*(ptr)))__gu_val; \
397 __gu_err; \
398})
399
400extern long __get_user_bad(void);
401
402#define __get_user_size(x, ptr, size, retval, errret) \
403do { \
404 retval = 0; \
405 __chk_user_ptr(ptr); \
406 switch (size) { \
407 case 1: \
408 __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \
409 break; \
410 case 2: \
411 __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \
412 break; \
413 case 4: \
414 __get_user_asm(x, ptr, retval, "l", "", "=r", errret); \
415 break; \
416 default: \
417 (x) = __get_user_bad(); \
418 } \
419} while (0)
420
421#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
422 asm volatile("1: mov"itype" %2,%"rtype"1\n" \
423 "2:\n" \
424 ".section .fixup,\"ax\"\n" \
425 "3: movl %3,%0\n" \
426 " xor"itype" %"rtype"1,%"rtype"1\n" \
427 " jmp 2b\n" \
428 ".previous\n" \
429 _ASM_EXTABLE(1b, 3b) \
430 : "=r" (err), ltype (x) \
431 : "m" (__m(addr)), "i" (errret), "0" (err))
432
433
434unsigned long __must_check __copy_to_user_ll 14unsigned long __must_check __copy_to_user_ll
435 (void __user *to, const void *from, unsigned long n); 15 (void __user *to, const void *from, unsigned long n);
436unsigned long __must_check __copy_from_user_ll 16unsigned long __must_check __copy_from_user_ll
@@ -576,8 +156,6 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
576 return __copy_from_user_ll(to, from, n); 156 return __copy_from_user_ll(to, from, n);
577} 157}
578 158
579#define ARCH_HAS_NOCACHE_UACCESS
580
581static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
582 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n)
583{ 161{
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index b8a2f4339903..515d4dce96b5 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -9,265 +9,6 @@
9#include <linux/prefetch.h> 9#include <linux/prefetch.h>
10#include <asm/page.h> 10#include <asm/page.h>
11 11
12#define VERIFY_READ 0
13#define VERIFY_WRITE 1
14
15/*
16 * The fs value determines whether argument validity checking should be
17 * performed or not. If get_fs() == USER_DS, checking is performed, with
18 * get_fs() == KERNEL_DS, checking is bypassed.
19 *
20 * For historical reasons, these macros are grossly misnamed.
21 */
22
23#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
24
25#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFFUL)
26#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
27
28#define get_ds() (KERNEL_DS)
29#define get_fs() (current_thread_info()->addr_limit)
30#define set_fs(x) (current_thread_info()->addr_limit = (x))
31
32#define segment_eq(a, b) ((a).seg == (b).seg)
33
34#define __addr_ok(addr) (!((unsigned long)(addr) & \
35 (current_thread_info()->addr_limit.seg)))
36
37/*
38 * Uhhuh, this needs 65-bit arithmetic. We have a carry..
39 */
40#define __range_not_ok(addr, size) \
41({ \
42 unsigned long flag, roksum; \
43 __chk_user_ptr(addr); \
44 asm("# range_ok\n\r" \
45 "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \
46 : "=&r" (flag), "=r" (roksum) \
47 : "1" (addr), "g" ((long)(size)), \
48 "g" (current_thread_info()->addr_limit.seg)); \
49 flag; \
50})
51
52#define access_ok(type, addr, size) (__range_not_ok(addr, size) == 0)
53
54/*
55 * The exception table consists of pairs of addresses: the first is the
56 * address of an instruction that is allowed to fault, and the second is
57 * the address at which the program should continue. No registers are
58 * modified, so it is entirely up to the continuation code to figure out
59 * what to do.
60 *
61 * All the routines below use bits of fixup code that are out of line
62 * with the main instruction path. This means when everything is well,
63 * we don't even have to jump over them. Further, they do not intrude
64 * on our cache or tlb entries.
65 */
66
67struct exception_table_entry {
68 unsigned long insn, fixup;
69};
70
71extern int fixup_exception(struct pt_regs *regs);
72
73#define ARCH_HAS_SEARCH_EXTABLE
74
75/*
76 * These are the main single-value transfer routines. They automatically
77 * use the right size if we just have the right pointer type.
78 *
79 * This gets kind of ugly. We want to return _two_ values in "get_user()"
80 * and yet we don't want to do any pointers, because that is too much
81 * of a performance impact. Thus we have a few rather ugly macros here,
82 * and hide all the ugliness from the user.
83 *
84 * The "__xxx" versions of the user access functions are versions that
85 * do not verify the address space, that must have been done previously
86 * with a separate "access_ok()" call (this is used when we do multiple
87 * accesses to the same area of user memory).
88 */
89
90#define __get_user_x(size, ret, x, ptr) \
91 asm volatile("call __get_user_" #size \
92 : "=a" (ret),"=d" (x) \
93 : "c" (ptr) \
94 : "r8")
95
96/* Careful: we have to cast the result to the type of the pointer
97 * for sign reasons */
98
99#define get_user(x, ptr) \
100({ \
101 unsigned long __val_gu; \
102 int __ret_gu; \
103 __chk_user_ptr(ptr); \
104 switch (sizeof(*(ptr))) { \
105 case 1: \
106 __get_user_x(1, __ret_gu, __val_gu, ptr); \
107 break; \
108 case 2: \
109 __get_user_x(2, __ret_gu, __val_gu, ptr); \
110 break; \
111 case 4: \
112 __get_user_x(4, __ret_gu, __val_gu, ptr); \
113 break; \
114 case 8: \
115 __get_user_x(8, __ret_gu, __val_gu, ptr); \
116 break; \
117 default: \
118 __get_user_bad(); \
119 break; \
120 } \
121 (x) = (__force typeof(*(ptr)))__val_gu; \
122 __ret_gu; \
123})
124
125extern void __put_user_1(void);
126extern void __put_user_2(void);
127extern void __put_user_4(void);
128extern void __put_user_8(void);
129extern void __put_user_bad(void);
130
131#define __put_user_x(size, ret, x, ptr) \
132 asm volatile("call __put_user_" #size \
133 :"=a" (ret) \
134 :"c" (ptr),"d" (x) \
135 :"r8")
136
137#define put_user(x, ptr) \
138 __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
139
140#define __get_user(x, ptr) \
141 __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
142#define __put_user(x, ptr) \
143 __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
144
145#define __get_user_unaligned __get_user
146#define __put_user_unaligned __put_user
147
148#define __put_user_nocheck(x, ptr, size) \
149({ \
150 int __pu_err; \
151 __put_user_size((x), (ptr), (size), __pu_err); \
152 __pu_err; \
153})
154
155
156#define __put_user_check(x, ptr, size) \
157({ \
158 int __pu_err; \
159 typeof(*(ptr)) __user *__pu_addr = (ptr); \
160 switch (size) { \
161 case 1: \
162 __put_user_x(1, __pu_err, x, __pu_addr); \
163 break; \
164 case 2: \
165 __put_user_x(2, __pu_err, x, __pu_addr); \
166 break; \
167 case 4: \
168 __put_user_x(4, __pu_err, x, __pu_addr); \
169 break; \
170 case 8: \
171 __put_user_x(8, __pu_err, x, __pu_addr); \
172 break; \
173 default: \
174 __put_user_bad(); \
175 } \
176 __pu_err; \
177})
178
179#define __put_user_size(x, ptr, size, retval) \
180do { \
181 retval = 0; \
182 __chk_user_ptr(ptr); \
183 switch (size) { \
184 case 1: \
185 __put_user_asm(x, ptr, retval, "b", "b", "iq", -EFAULT);\
186 break; \
187 case 2: \
188 __put_user_asm(x, ptr, retval, "w", "w", "ir", -EFAULT);\
189 break; \
190 case 4: \
191 __put_user_asm(x, ptr, retval, "l", "k", "ir", -EFAULT);\
192 break; \
193 case 8: \
194 __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT); \
195 break; \
196 default: \
197 __put_user_bad(); \
198 } \
199} while (0)
200
201/* FIXME: this hack is definitely wrong -AK */
202struct __large_struct { unsigned long buf[100]; };
203#define __m(x) (*(struct __large_struct __user *)(x))
204
205/*
206 * Tell gcc we read from memory instead of writing: this is because
207 * we do not write to any memory gcc knows about, so there are no
208 * aliasing issues.
209 */
210#define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \
211 asm volatile("1: mov"itype" %"rtype"1,%2\n" \
212 "2:\n" \
213 ".section .fixup, \"ax\"\n" \
214 "3: mov %3,%0\n" \
215 " jmp 2b\n" \
216 ".previous\n" \
217 _ASM_EXTABLE(1b, 3b) \
218 : "=r"(err) \
219 : ltype (x), "m" (__m(addr)), "i" (errno), "0" (err))
220
221
222#define __get_user_nocheck(x, ptr, size) \
223({ \
224 int __gu_err; \
225 unsigned long __gu_val; \
226 __get_user_size(__gu_val, (ptr), (size), __gu_err); \
227 (x) = (__force typeof(*(ptr)))__gu_val; \
228 __gu_err; \
229})
230
231extern int __get_user_1(void);
232extern int __get_user_2(void);
233extern int __get_user_4(void);
234extern int __get_user_8(void);
235extern int __get_user_bad(void);
236
237#define __get_user_size(x, ptr, size, retval) \
238do { \
239 retval = 0; \
240 __chk_user_ptr(ptr); \
241 switch (size) { \
242 case 1: \
243 __get_user_asm(x, ptr, retval, "b", "b", "=q", -EFAULT);\
244 break; \
245 case 2: \
246 __get_user_asm(x, ptr, retval, "w", "w", "=r", -EFAULT);\
247 break; \
248 case 4: \
249 __get_user_asm(x, ptr, retval, "l", "k", "=r", -EFAULT);\
250 break; \
251 case 8: \
252 __get_user_asm(x, ptr, retval, "q", "", "=r", -EFAULT); \
253 break; \
254 default: \
255 (x) = __get_user_bad(); \
256 } \
257} while (0)
258
259#define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \
260 asm volatile("1: mov"itype" %2,%"rtype"1\n" \
261 "2:\n" \
262 ".section .fixup, \"ax\"\n" \
263 "3: mov %3,%0\n" \
264 " xor"itype" %"rtype"1,%"rtype"1\n" \
265 " jmp 2b\n" \
266 ".previous\n" \
267 _ASM_EXTABLE(1b, 3b) \
268 : "=r" (err), ltype (x) \
269 : "m" (__m(addr)), "i"(errno), "0"(err))
270
271/* 12/*
272 * Copy To/From Userspace 13 * Copy To/From Userspace
273 */ 14 */
@@ -437,7 +178,6 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
437 return copy_user_generic((__force void *)dst, src, size); 178 return copy_user_generic((__force void *)dst, src, size);
438} 179}
439 180
440#define ARCH_HAS_NOCACHE_UACCESS 1
441extern long __copy_user_nocache(void *dst, const void __user *src, 181extern long __copy_user_nocache(void *dst, const void __user *src,
442 unsigned size, int zerorest); 182 unsigned size, int zerorest);
443 183
@@ -455,4 +195,7 @@ static inline int __copy_from_user_inatomic_nocache(void *dst,
455 return __copy_user_nocache(dst, src, size, 0); 195 return __copy_user_nocache(dst, src, size, 0);
456} 196}
457 197
198unsigned long
199copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
200
458#endif /* __X86_64_UACCESS_H */ 201#endif /* __X86_64_UACCESS_H */
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
index fe26e36d0f51..9c1a4a3470d9 100644
--- a/include/asm-x86/unistd_64.h
+++ b/include/asm-x86/unistd_64.h
@@ -290,7 +290,7 @@ __SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
290#define __NR_rt_sigqueueinfo 129 290#define __NR_rt_sigqueueinfo 129
291__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo) 291__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
292#define __NR_rt_sigsuspend 130 292#define __NR_rt_sigsuspend 130
293__SYSCALL(__NR_rt_sigsuspend, stub_rt_sigsuspend) 293__SYSCALL(__NR_rt_sigsuspend, sys_rt_sigsuspend)
294#define __NR_sigaltstack 131 294#define __NR_sigaltstack 131
295__SYSCALL(__NR_sigaltstack, stub_sigaltstack) 295__SYSCALL(__NR_sigaltstack, stub_sigaltstack)
296#define __NR_utime 132 296#define __NR_utime 132
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
new file mode 100644
index 000000000000..91ac0dfb7588
--- /dev/null
+++ b/include/asm-x86/uv/uv_bau.h
@@ -0,0 +1,337 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV Broadcast Assist Unit definitions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#ifndef __ASM_X86_UV_BAU__
12#define __ASM_X86_UV_BAU__
13
14#include <linux/bitmap.h>
15#define BITSPERBYTE 8
16
17/*
18 * Broadcast Assist Unit messaging structures
19 *
20 * Selective Broadcast activations are induced by software action
21 * specifying a particular 8-descriptor "set" via a 6-bit index written
22 * to an MMR.
23 * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
24 * each 6-bit index value. These descriptor sets are mapped in sequence
25 * starting with set 0 located at the address specified in the
26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
28 *
29 * We will use 31 sets, one for sending BAU messages from each of the 32
30 * cpu's on the node.
31 *
32 * TLB shootdown will use the first of the 8 descriptors of each set.
33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
34 */
35
36#define UV_ITEMS_PER_DESCRIPTOR 8
37#define UV_CPUS_PER_ACT_STATUS 32
38#define UV_ACT_STATUS_MASK 0x3
39#define UV_ACT_STATUS_SIZE 2
40#define UV_ACTIVATION_DESCRIPTOR_SIZE 32
41#define UV_DISTRIBUTION_SIZE 256
42#define UV_SW_ACK_NPENDING 8
43#define UV_BAU_MESSAGE 200
44/*
45 * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h
46 * To be dynamically allocated in the future
47 */
48#define UV_NET_ENDPOINT_INTD 0x38
49#define UV_DESC_BASE_PNODE_SHIFT 49
50#define UV_PAYLOADQ_PNODE_SHIFT 49
51#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
52#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
53
54/*
55 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
56 */
57#define DESC_STATUS_IDLE 0
58#define DESC_STATUS_ACTIVE 1
59#define DESC_STATUS_DESTINATION_TIMEOUT 2
60#define DESC_STATUS_SOURCE_TIMEOUT 3
61
62/*
63 * source side threshholds at which message retries print a warning
64 */
65#define SOURCE_TIMEOUT_LIMIT 20
66#define DESTINATION_TIMEOUT_LIMIT 20
67
68/*
69 * number of entries in the destination side payload queue
70 */
71#define DEST_Q_SIZE 17
72/*
73 * number of destination side software ack resources
74 */
75#define DEST_NUM_RESOURCES 8
76#define MAX_CPUS_PER_NODE 32
77/*
78 * completion statuses for sending a TLB flush message
79 */
80#define FLUSH_RETRY 1
81#define FLUSH_GIVEUP 2
82#define FLUSH_COMPLETE 3
83
84/*
85 * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
86 * If the 'multilevel' flag in the header portion of the descriptor
87 * has been set to 0, then endpoint multi-unicast mode is selected.
88 * The distribution specification (32 bytes) is interpreted as a 256-bit
89 * distribution vector. Adjacent bits correspond to consecutive even numbered
90 * nodeIDs. The result of adding the index of a given bit to the 15-bit
91 * 'base_dest_nodeid' field of the header corresponds to the
92 * destination nodeID associated with that specified bit.
93 */
94struct bau_target_nodemask {
95 unsigned long bits[BITS_TO_LONGS(256)];
96};
97
98/*
99 * mask of cpu's on a node
100 * (during initialization we need to check that unsigned long has
101 * enough bits for max. cpu's per node)
102 */
103struct bau_local_cpumask {
104 unsigned long bits;
105};
106
107/*
108 * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
109 * only 12 bytes (96 bits) of the payload area are usable.
110 * An additional 3 bytes (bits 27:4) of the header address are carried
111 * to the next bytes of the destination payload queue.
112 * And an additional 2 bytes of the header Suppl_A field are also
113 * carried to the destination payload queue.
114 * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
115 * of the destination payload queue, which is written by the hardware
116 * with the s/w ack resource bit vector.
117 * [ effective message contents (16 bytes (128 bits) maximum), not counting
118 * the s/w ack bit vector ]
119 */
120
121/*
122 * The payload is software-defined for INTD transactions
123 */
124struct bau_msg_payload {
125 unsigned long address; /* signifies a page or all TLB's
126 of the cpu */
127 /* 64 bits */
128 unsigned short sending_cpu; /* filled in by sender */
129 /* 16 bits */
130 unsigned short acknowledge_count;/* filled in by destination */
131 /* 16 bits */
132 unsigned int reserved1:32; /* not usable */
133};
134
135
136/*
137 * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
138 * see table 4.2.3.0.1 in broacast_assist spec.
139 */
140struct bau_msg_header {
141 int dest_subnodeid:6; /* must be zero */
142 /* bits 5:0 */
143 int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
144 /* bits 20:6 */
145 int command:8; /* message type */
146 /* bits 28:21 */
147 /* 0x38: SN3net EndPoint Message */
148 int rsvd_1:3; /* must be zero */
149 /* bits 31:29 */
150 /* int will align on 32 bits */
151 int rsvd_2:9; /* must be zero */
152 /* bits 40:32 */
153 /* Suppl_A is 56-41 */
154 int payload_2a:8; /* becomes byte 16 of msg */
155 /* bits 48:41 */ /* not currently using */
156 int payload_2b:8; /* becomes byte 17 of msg */
157 /* bits 56:49 */ /* not currently using */
158 /* Address field (96:57) is never used as an
159 address (these are address bits 42:3) */
160 int rsvd_3:1; /* must be zero */
161 /* bit 57 */
162 /* address bits 27:4 are payload */
163 /* these 24 bits become bytes 12-14 of msg */
164 int replied_to:1; /* sent as 0 by the source to byte 12 */
165 /* bit 58 */
166
167 int payload_1a:5; /* not currently used */
168 /* bits 63:59 */
169 int payload_1b:8; /* not currently used */
170 /* bits 71:64 */
171 int payload_1c:8; /* not currently used */
172 /* bits 79:72 */
173 int payload_1d:2; /* not currently used */
174 /* bits 81:80 */
175
176 int rsvd_4:7; /* must be zero */
177 /* bits 88:82 */
178 int sw_ack_flag:1; /* software acknowledge flag */
179 /* bit 89 */
180 /* INTD trasactions at destination are to
181 wait for software acknowledge */
182 int rsvd_5:6; /* must be zero */
183 /* bits 95:90 */
184 int rsvd_6:5; /* must be zero */
185 /* bits 100:96 */
186 int int_both:1; /* if 1, interrupt both sockets on the blade */
187 /* bit 101*/
188 int fairness:3; /* usually zero */
189 /* bits 104:102 */
190 int multilevel:1; /* multi-level multicast format */
191 /* bit 105 */
192 /* 0 for TLB: endpoint multi-unicast messages */
193 int chaining:1; /* next descriptor is part of this activation*/
194 /* bit 106 */
195 int rsvd_7:21; /* must be zero */
196 /* bits 127:107 */
197};
198
199/*
200 * The activation descriptor:
201 * The format of the message to send, plus all accompanying control
202 * Should be 64 bytes
203 */
204struct bau_desc {
205 struct bau_target_nodemask distribution;
206 /*
207 * message template, consisting of header and payload:
208 */
209 struct bau_msg_header header;
210 struct bau_msg_payload payload;
211};
212/*
213 * -payload-- ---------header------
214 * bytes 0-11 bits 41-56 bits 58-81
215 * A B (2) C (3)
216 *
217 * A/B/C are moved to:
218 * A C B
219 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
220 * ------------payload queue-----------
221 */
222
223/*
224 * The payload queue on the destination side is an array of these.
225 * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
226 * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
227 * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
228 * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
229 * sw_ack_vector and payload_2)
230 * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
231 * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
232 * operation."
233 */
234struct bau_payload_queue_entry {
235 unsigned long address; /* signifies a page or all TLB's
236 of the cpu */
237 /* 64 bits, bytes 0-7 */
238
239 unsigned short sending_cpu; /* cpu that sent the message */
240 /* 16 bits, bytes 8-9 */
241
242 unsigned short acknowledge_count; /* filled in by destination */
243 /* 16 bits, bytes 10-11 */
244
245 unsigned short replied_to:1; /* sent as 0 by the source */
246 /* 1 bit */
247 unsigned short unused1:7; /* not currently using */
248 /* 7 bits: byte 12) */
249
250 unsigned char unused2[2]; /* not currently using */
251 /* bytes 13-14 */
252
253 unsigned char sw_ack_vector; /* filled in by the hardware */
254 /* byte 15 (bits 127:120) */
255
256 unsigned char unused4[3]; /* not currently using bytes 17-19 */
257 /* bytes 17-19 */
258
259 int number_of_cpus; /* filled in at destination */
260 /* 32 bits, bytes 20-23 (aligned) */
261
262 unsigned char unused5[8]; /* not using */
263 /* bytes 24-31 */
264};
265
266/*
267 * one for every slot in the destination payload queue
268 */
269struct bau_msg_status {
270 struct bau_local_cpumask seen_by; /* map of cpu's */
271};
272
273/*
274 * one for every slot in the destination software ack resources
275 */
276struct bau_sw_ack_status {
277 struct bau_payload_queue_entry *msg; /* associated message */
278 int watcher; /* cpu monitoring, or -1 */
279};
280
281/*
282 * one on every node and per-cpu; to locate the software tables
283 */
284struct bau_control {
285 struct bau_desc *descriptor_base;
286 struct bau_payload_queue_entry *bau_msg_head;
287 struct bau_payload_queue_entry *va_queue_first;
288 struct bau_payload_queue_entry *va_queue_last;
289 struct bau_msg_status *msg_statuses;
290 int *watching; /* pointer to array */
291};
292
293/*
294 * This structure is allocated per_cpu for UV TLB shootdown statistics.
295 */
296struct ptc_stats {
297 unsigned long ptc_i; /* number of IPI-style flushes */
298 unsigned long requestor; /* number of nodes this cpu sent to */
299 unsigned long requestee; /* times cpu was remotely requested */
300 unsigned long alltlb; /* times all tlb's on this cpu were flushed */
301 unsigned long onetlb; /* times just one tlb on this cpu was flushed */
302 unsigned long s_retry; /* retries on source side timeouts */
303 unsigned long d_retry; /* retries on destination side timeouts */
304 unsigned long sflush; /* cycles spent in uv_flush_tlb_others */
305 unsigned long dflush; /* cycles spent on destination side */
306 unsigned long retriesok; /* successes on retries */
307 unsigned long nomsg; /* interrupts with no message */
308 unsigned long multmsg; /* interrupts with multiple messages */
309 unsigned long ntargeted;/* nodes targeted */
310};
311
312static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp)
313{
314 return constant_test_bit(node, &dstp->bits[0]);
315}
316static inline void bau_node_set(int node, struct bau_target_nodemask *dstp)
317{
318 __set_bit(node, &dstp->bits[0]);
319}
320static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits)
321{
322 bitmap_zero(&dstp->bits[0], nbits);
323}
324
325static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
326{
327 bitmap_zero(&dstp->bits, nbits);
328}
329
330#define cpubit_isset(cpu, bau_local_cpumask) \
331 test_bit((cpu), (bau_local_cpumask).bits)
332
333extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
334extern void uv_bau_message_intr1(void);
335extern void uv_bau_timeout_intr1(void);
336
337#endif /* __ASM_X86_UV_BAU__ */
diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h
index 26b9240d1e23..a4ef26e5850b 100644
--- a/include/asm-x86/uv/uv_hub.h
+++ b/include/asm-x86/uv/uv_hub.h
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV architectural definitions 6 * SGI UV architectural definitions
7 * 7 *
8 * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#ifndef __ASM_X86_UV_HUB_H__ 11#ifndef __ASM_X86_UV_HUB_H__
@@ -20,26 +20,49 @@
20/* 20/*
21 * Addressing Terminology 21 * Addressing Terminology
22 * 22 *
23 * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of 23 * M - The low M bits of a physical address represent the offset
24 * routers always have low bit of 1, C/MBricks have low bit 24 * into the blade local memory. RAM memory on a blade is physically
25 * equal to 0. Most addressing macros that target UV hub chips 25 * contiguous (although various IO spaces may punch holes in
26 * right shift the NASID by 1 to exclude the always-zero bit. 26 * it)..
27 * 27 *
28 * SNASID - NASID right shifted by 1 bit. 28 * N - Number of bits in the node portion of a socket physical
29 * address.
30 *
31 * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of
32 * routers always have low bit of 1, C/MBricks have low bit
33 * equal to 0. Most addressing macros that target UV hub chips
34 * right shift the NASID by 1 to exclude the always-zero bit.
35 * NASIDs contain up to 15 bits.
36 *
37 * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead
38 * of nasids.
39 *
40 * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant
41 * of the nasid for socket usage.
42 *
43 *
44 * NumaLink Global Physical Address Format:
45 * +--------------------------------+---------------------+
46 * |00..000| GNODE | NodeOffset |
47 * +--------------------------------+---------------------+
48 * |<-------53 - M bits --->|<--------M bits ----->
49 *
50 * M - number of node offset bits (35 .. 40)
29 * 51 *
30 * 52 *
31 * Memory/UV-HUB Processor Socket Address Format: 53 * Memory/UV-HUB Processor Socket Address Format:
32 * +--------+---------------+---------------------+ 54 * +----------------+---------------+---------------------+
33 * |00..0000| SNASID | NodeOffset | 55 * |00..000000000000| PNODE | NodeOffset |
34 * +--------+---------------+---------------------+ 56 * +----------------+---------------+---------------------+
35 * <--- N bits --->|<--------M bits -----> 57 * <--- N bits --->|<--------M bits ----->
36 * 58 *
37 * M number of node offset bits (35 .. 40) 59 * M - number of node offset bits (35 .. 40)
38 * N number of SNASID bits (0 .. 10) 60 * N - number of PNODE bits (0 .. 10)
39 * 61 *
40 * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). 62 * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64).
41 * The actual values are configuration dependent and are set at 63 * The actual values are configuration dependent and are set at
42 * boot time 64 * boot time. M & N values are set by the hardware/BIOS at boot.
65 *
43 * 66 *
44 * APICID format 67 * APICID format
45 * NOTE!!!!!! This is the current format of the APICID. However, code 68 * NOTE!!!!!! This is the current format of the APICID. However, code
@@ -48,14 +71,14 @@
48 * 71 *
49 * 1111110000000000 72 * 1111110000000000
50 * 5432109876543210 73 * 5432109876543210
51 * nnnnnnnnnnlc0cch 74 * pppppppppplc0cch
52 * sssssssssss 75 * sssssssssss
53 * 76 *
54 * n = snasid bits 77 * p = pnode bits
55 * l = socket number on board 78 * l = socket number on board
56 * c = core 79 * c = core
57 * h = hyperthread 80 * h = hyperthread
58 * s = bits that are in the socket CSR 81 * s = bits that are in the SOCKET_ID CSR
59 * 82 *
60 * Note: Processor only supports 12 bits in the APICID register. The ACPI 83 * Note: Processor only supports 12 bits in the APICID register. The ACPI
61 * tables hold all 16 bits. Software needs to be aware of this. 84 * tables hold all 16 bits. Software needs to be aware of this.
@@ -74,7 +97,7 @@
74 * This value is also the value of the maximum number of non-router NASIDs 97 * This value is also the value of the maximum number of non-router NASIDs
75 * in the numalink fabric. 98 * in the numalink fabric.
76 * 99 *
77 * NOTE: a brick may be 1 or 2 OS nodes. Don't get these confused. 100 * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused.
78 */ 101 */
79#define UV_MAX_NUMALINK_BLADES 16384 102#define UV_MAX_NUMALINK_BLADES 16384
80 103
@@ -96,8 +119,12 @@
96 */ 119 */
97struct uv_hub_info_s { 120struct uv_hub_info_s {
98 unsigned long global_mmr_base; 121 unsigned long global_mmr_base;
99 unsigned short local_nasid; 122 unsigned long gpa_mask;
100 unsigned short gnode_upper; 123 unsigned long gnode_upper;
124 unsigned long lowmem_remap_top;
125 unsigned long lowmem_remap_base;
126 unsigned short pnode;
127 unsigned short pnode_mask;
101 unsigned short coherency_domain_number; 128 unsigned short coherency_domain_number;
102 unsigned short numa_blade_id; 129 unsigned short numa_blade_id;
103 unsigned char blade_processor_id; 130 unsigned char blade_processor_id;
@@ -112,83 +139,126 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
112 * Local & Global MMR space macros. 139 * Local & Global MMR space macros.
113 * Note: macros are intended to be used ONLY by inline functions 140 * Note: macros are intended to be used ONLY by inline functions
114 * in this file - not by other kernel code. 141 * in this file - not by other kernel code.
142 * n - NASID (full 15-bit global nasid)
143 * g - GNODE (full 15-bit global nasid, right shifted 1)
144 * p - PNODE (local part of nsids, right shifted 1)
115 */ 145 */
116#define UV_SNASID(n) ((n) >> 1) 146#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask)
117#define UV_NASID(n) ((n) << 1) 147#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper)
118 148
119#define UV_LOCAL_MMR_BASE 0xf4000000UL 149#define UV_LOCAL_MMR_BASE 0xf4000000UL
120#define UV_GLOBAL_MMR32_BASE 0xf8000000UL 150#define UV_GLOBAL_MMR32_BASE 0xf8000000UL
121#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) 151#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
152#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024)
153#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024)
122 154
123#define UV_GLOBAL_MMR32_SNASID_MASK 0x3ff 155#define UV_GLOBAL_MMR32_PNODE_SHIFT 15
124#define UV_GLOBAL_MMR32_SNASID_SHIFT 15 156#define UV_GLOBAL_MMR64_PNODE_SHIFT 26
125#define UV_GLOBAL_MMR64_SNASID_SHIFT 26
126 157
127#define UV_GLOBAL_MMR32_NASID_BITS(n) \ 158#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
128 (((UV_SNASID(n) & UV_GLOBAL_MMR32_SNASID_MASK)) << \
129 (UV_GLOBAL_MMR32_SNASID_SHIFT))
130 159
131#define UV_GLOBAL_MMR64_NASID_BITS(n) \ 160#define UV_GLOBAL_MMR64_PNODE_BITS(p) \
132 ((unsigned long)UV_SNASID(n) << UV_GLOBAL_MMR64_SNASID_SHIFT) 161 ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
162
163#define UV_APIC_PNODE_SHIFT 6
164
165/*
166 * Macros for converting between kernel virtual addresses, socket local physical
167 * addresses, and UV global physical addresses.
168 * Note: use the standard __pa() & __va() macros for converting
169 * between socket virtual and socket physical addresses.
170 */
171
172/* socket phys RAM --> UV global physical address */
173static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
174{
175 if (paddr < uv_hub_info->lowmem_remap_top)
176 paddr += uv_hub_info->lowmem_remap_base;
177 return paddr | uv_hub_info->gnode_upper;
178}
179
180
181/* socket virtual --> UV global physical address */
182static inline unsigned long uv_gpa(void *v)
183{
184 return __pa(v) | uv_hub_info->gnode_upper;
185}
186
187/* socket virtual --> UV global physical address */
188static inline void *uv_vgpa(void *v)
189{
190 return (void *)uv_gpa(v);
191}
192
193/* UV global physical address --> socket virtual */
194static inline void *uv_va(unsigned long gpa)
195{
196 return __va(gpa & uv_hub_info->gpa_mask);
197}
198
199/* pnode, offset --> socket virtual */
200static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
201{
202 return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset);
203}
133 204
134#define UV_APIC_NASID_SHIFT 6
135 205
136/* 206/*
137 * Extract a NASID from an APICID (full apicid, not processor subset) 207 * Extract a PNODE from an APICID (full apicid, not processor subset)
138 */ 208 */
139static inline int uv_apicid_to_nasid(int apicid) 209static inline int uv_apicid_to_pnode(int apicid)
140{ 210{
141 return (UV_NASID(apicid >> UV_APIC_NASID_SHIFT)); 211 return (apicid >> UV_APIC_PNODE_SHIFT);
142} 212}
143 213
144/* 214/*
145 * Access global MMRs using the low memory MMR32 space. This region supports 215 * Access global MMRs using the low memory MMR32 space. This region supports
146 * faster MMR access but not all MMRs are accessible in this space. 216 * faster MMR access but not all MMRs are accessible in this space.
147 */ 217 */
148static inline unsigned long *uv_global_mmr32_address(int nasid, 218static inline unsigned long *uv_global_mmr32_address(int pnode,
149 unsigned long offset) 219 unsigned long offset)
150{ 220{
151 return __va(UV_GLOBAL_MMR32_BASE | 221 return __va(UV_GLOBAL_MMR32_BASE |
152 UV_GLOBAL_MMR32_NASID_BITS(nasid) | offset); 222 UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset);
153} 223}
154 224
155static inline void uv_write_global_mmr32(int nasid, unsigned long offset, 225static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
156 unsigned long val) 226 unsigned long val)
157{ 227{
158 *uv_global_mmr32_address(nasid, offset) = val; 228 *uv_global_mmr32_address(pnode, offset) = val;
159} 229}
160 230
161static inline unsigned long uv_read_global_mmr32(int nasid, 231static inline unsigned long uv_read_global_mmr32(int pnode,
162 unsigned long offset) 232 unsigned long offset)
163{ 233{
164 return *uv_global_mmr32_address(nasid, offset); 234 return *uv_global_mmr32_address(pnode, offset);
165} 235}
166 236
167/* 237/*
168 * Access Global MMR space using the MMR space located at the top of physical 238 * Access Global MMR space using the MMR space located at the top of physical
169 * memory. 239 * memory.
170 */ 240 */
171static inline unsigned long *uv_global_mmr64_address(int nasid, 241static inline unsigned long *uv_global_mmr64_address(int pnode,
172 unsigned long offset) 242 unsigned long offset)
173{ 243{
174 return __va(UV_GLOBAL_MMR64_BASE | 244 return __va(UV_GLOBAL_MMR64_BASE |
175 UV_GLOBAL_MMR64_NASID_BITS(nasid) | offset); 245 UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset);
176} 246}
177 247
178static inline void uv_write_global_mmr64(int nasid, unsigned long offset, 248static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
179 unsigned long val) 249 unsigned long val)
180{ 250{
181 *uv_global_mmr64_address(nasid, offset) = val; 251 *uv_global_mmr64_address(pnode, offset) = val;
182} 252}
183 253
184static inline unsigned long uv_read_global_mmr64(int nasid, 254static inline unsigned long uv_read_global_mmr64(int pnode,
185 unsigned long offset) 255 unsigned long offset)
186{ 256{
187 return *uv_global_mmr64_address(nasid, offset); 257 return *uv_global_mmr64_address(pnode, offset);
188} 258}
189 259
190/* 260/*
191 * Access node local MMRs. Faster than using global space but only local MMRs 261 * Access hub local MMRs. Faster than using global space but only local MMRs
192 * are accessible. 262 * are accessible.
193 */ 263 */
194static inline unsigned long *uv_local_mmr_address(unsigned long offset) 264static inline unsigned long *uv_local_mmr_address(unsigned long offset)
@@ -207,15 +277,15 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
207} 277}
208 278
209/* 279/*
210 * Structures and definitions for converting between cpu, node, and blade 280 * Structures and definitions for converting between cpu, node, pnode, and blade
211 * numbers. 281 * numbers.
212 */ 282 */
213struct uv_blade_info { 283struct uv_blade_info {
214 unsigned short nr_posible_cpus; 284 unsigned short nr_possible_cpus;
215 unsigned short nr_online_cpus; 285 unsigned short nr_online_cpus;
216 unsigned short nasid; 286 unsigned short pnode;
217}; 287};
218struct uv_blade_info *uv_blade_info; 288extern struct uv_blade_info *uv_blade_info;
219extern short *uv_node_to_blade; 289extern short *uv_node_to_blade;
220extern short *uv_cpu_to_blade; 290extern short *uv_cpu_to_blade;
221extern short uv_possible_blades; 291extern short uv_possible_blades;
@@ -244,16 +314,16 @@ static inline int uv_node_to_blade_id(int nid)
244 return uv_node_to_blade[nid]; 314 return uv_node_to_blade[nid];
245} 315}
246 316
247/* Convert a blade id to the NASID of the blade */ 317/* Convert a blade id to the PNODE of the blade */
248static inline int uv_blade_to_nasid(int bid) 318static inline int uv_blade_to_pnode(int bid)
249{ 319{
250 return uv_blade_info[bid].nasid; 320 return uv_blade_info[bid].pnode;
251} 321}
252 322
253/* Determine the number of possible cpus on a blade */ 323/* Determine the number of possible cpus on a blade */
254static inline int uv_blade_nr_possible_cpus(int bid) 324static inline int uv_blade_nr_possible_cpus(int bid)
255{ 325{
256 return uv_blade_info[bid].nr_posible_cpus; 326 return uv_blade_info[bid].nr_possible_cpus;
257} 327}
258 328
259/* Determine the number of online cpus on a blade */ 329/* Determine the number of online cpus on a blade */
@@ -262,16 +332,16 @@ static inline int uv_blade_nr_online_cpus(int bid)
262 return uv_blade_info[bid].nr_online_cpus; 332 return uv_blade_info[bid].nr_online_cpus;
263} 333}
264 334
265/* Convert a cpu id to the NASID of the blade containing the cpu */ 335/* Convert a cpu id to the PNODE of the blade containing the cpu */
266static inline int uv_cpu_to_nasid(int cpu) 336static inline int uv_cpu_to_pnode(int cpu)
267{ 337{
268 return uv_blade_info[uv_cpu_to_blade_id(cpu)].nasid; 338 return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode;
269} 339}
270 340
271/* Convert a node number to the NASID of the blade */ 341/* Convert a linux node number to the PNODE of the blade */
272static inline int uv_node_to_nasid(int nid) 342static inline int uv_node_to_pnode(int nid)
273{ 343{
274 return uv_blade_info[uv_node_to_blade_id(nid)].nasid; 344 return uv_blade_info[uv_node_to_blade_id(nid)].pnode;
275} 345}
276 346
277/* Maximum possible number of blades */ 347/* Maximum possible number of blades */
diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h
index 3b69fe6b6376..151fd7fcb809 100644
--- a/include/asm-x86/uv/uv_mmrs.h
+++ b/include/asm-x86/uv/uv_mmrs.h
@@ -11,17 +11,290 @@
11#ifndef __ASM_X86_UV_MMRS__ 11#ifndef __ASM_X86_UV_MMRS__
12#define __ASM_X86_UV_MMRS__ 12#define __ASM_X86_UV_MMRS__
13 13
14/* 14#define UV_MMR_ENABLE (1UL << 63)
15 * AUTO GENERATED - Do not edit 15
16 */ 16/* ========================================================================= */
17/* UVH_BAU_DATA_CONFIG */
18/* ========================================================================= */
19#define UVH_BAU_DATA_CONFIG 0x61680UL
20#define UVH_BAU_DATA_CONFIG_32 0x0438
21
22#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
23#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL
24#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
25#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL
26#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11
27#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL
28#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12
29#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL
30#define UVH_BAU_DATA_CONFIG_P_SHFT 13
31#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL
32#define UVH_BAU_DATA_CONFIG_T_SHFT 15
33#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL
34#define UVH_BAU_DATA_CONFIG_M_SHFT 16
35#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
36#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32
37#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
38
39union uvh_bau_data_config_u {
40 unsigned long v;
41 struct uvh_bau_data_config_s {
42 unsigned long vector_ : 8; /* RW */
43 unsigned long dm : 3; /* RW */
44 unsigned long destmode : 1; /* RW */
45 unsigned long status : 1; /* RO */
46 unsigned long p : 1; /* RO */
47 unsigned long rsvd_14 : 1; /* */
48 unsigned long t : 1; /* RO */
49 unsigned long m : 1; /* RW */
50 unsigned long rsvd_17_31: 15; /* */
51 unsigned long apic_id : 32; /* RW */
52 } s;
53};
54
55/* ========================================================================= */
56/* UVH_EVENT_OCCURRED0 */
57/* ========================================================================= */
58#define UVH_EVENT_OCCURRED0 0x70000UL
59#define UVH_EVENT_OCCURRED0_32 0x005e8
60
61#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0
62#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL
63#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1
64#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL
65#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2
66#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL
67#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3
68#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL
69#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4
70#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL
71#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5
72#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL
73#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6
74#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL
75#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7
76#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL
77#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8
78#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL
79#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9
80#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL
81#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10
82#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL
83#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11
84#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL
85#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12
86#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL
87#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13
88#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL
89#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14
90#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL
91#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15
92#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL
93#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16
94#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL
95#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17
96#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL
97#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18
98#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL
99#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19
100#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL
101#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20
102#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL
103#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21
104#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL
105#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22
106#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
107#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23
108#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL
109#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24
110#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL
111#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25
112#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL
113#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26
114#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL
115#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27
116#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL
117#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28
118#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL
119#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29
120#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL
121#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30
122#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL
123#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31
124#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL
125#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32
126#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL
127#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33
128#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL
129#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34
130#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL
131#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35
132#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL
133#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36
134#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL
135#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37
136#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL
137#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38
138#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL
139#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39
140#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL
141#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40
142#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL
143#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41
144#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL
145#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42
146#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL
147#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43
148#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL
149#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44
150#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
151#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45
152#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL
153#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46
154#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL
155#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47
156#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL
157#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48
158#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL
159#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49
160#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL
161#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50
162#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL
163#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51
164#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL
165#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52
166#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL
167#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53
168#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL
169#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54
170#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL
171#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55
172#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
173#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56
174#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
175union uvh_event_occurred0_u {
176 unsigned long v;
177 struct uvh_event_occurred0_s {
178 unsigned long lb_hcerr : 1; /* RW, W1C */
179 unsigned long gr0_hcerr : 1; /* RW, W1C */
180 unsigned long gr1_hcerr : 1; /* RW, W1C */
181 unsigned long lh_hcerr : 1; /* RW, W1C */
182 unsigned long rh_hcerr : 1; /* RW, W1C */
183 unsigned long xn_hcerr : 1; /* RW, W1C */
184 unsigned long si_hcerr : 1; /* RW, W1C */
185 unsigned long lb_aoerr0 : 1; /* RW, W1C */
186 unsigned long gr0_aoerr0 : 1; /* RW, W1C */
187 unsigned long gr1_aoerr0 : 1; /* RW, W1C */
188 unsigned long lh_aoerr0 : 1; /* RW, W1C */
189 unsigned long rh_aoerr0 : 1; /* RW, W1C */
190 unsigned long xn_aoerr0 : 1; /* RW, W1C */
191 unsigned long si_aoerr0 : 1; /* RW, W1C */
192 unsigned long lb_aoerr1 : 1; /* RW, W1C */
193 unsigned long gr0_aoerr1 : 1; /* RW, W1C */
194 unsigned long gr1_aoerr1 : 1; /* RW, W1C */
195 unsigned long lh_aoerr1 : 1; /* RW, W1C */
196 unsigned long rh_aoerr1 : 1; /* RW, W1C */
197 unsigned long xn_aoerr1 : 1; /* RW, W1C */
198 unsigned long si_aoerr1 : 1; /* RW, W1C */
199 unsigned long rh_vpi_int : 1; /* RW, W1C */
200 unsigned long system_shutdown_int : 1; /* RW, W1C */
201 unsigned long lb_irq_int_0 : 1; /* RW, W1C */
202 unsigned long lb_irq_int_1 : 1; /* RW, W1C */
203 unsigned long lb_irq_int_2 : 1; /* RW, W1C */
204 unsigned long lb_irq_int_3 : 1; /* RW, W1C */
205 unsigned long lb_irq_int_4 : 1; /* RW, W1C */
206 unsigned long lb_irq_int_5 : 1; /* RW, W1C */
207 unsigned long lb_irq_int_6 : 1; /* RW, W1C */
208 unsigned long lb_irq_int_7 : 1; /* RW, W1C */
209 unsigned long lb_irq_int_8 : 1; /* RW, W1C */
210 unsigned long lb_irq_int_9 : 1; /* RW, W1C */
211 unsigned long lb_irq_int_10 : 1; /* RW, W1C */
212 unsigned long lb_irq_int_11 : 1; /* RW, W1C */
213 unsigned long lb_irq_int_12 : 1; /* RW, W1C */
214 unsigned long lb_irq_int_13 : 1; /* RW, W1C */
215 unsigned long lb_irq_int_14 : 1; /* RW, W1C */
216 unsigned long lb_irq_int_15 : 1; /* RW, W1C */
217 unsigned long l1_nmi_int : 1; /* RW, W1C */
218 unsigned long stop_clock : 1; /* RW, W1C */
219 unsigned long asic_to_l1 : 1; /* RW, W1C */
220 unsigned long l1_to_asic : 1; /* RW, W1C */
221 unsigned long ltc_int : 1; /* RW, W1C */
222 unsigned long la_seq_trigger : 1; /* RW, W1C */
223 unsigned long ipi_int : 1; /* RW, W1C */
224 unsigned long extio_int0 : 1; /* RW, W1C */
225 unsigned long extio_int1 : 1; /* RW, W1C */
226 unsigned long extio_int2 : 1; /* RW, W1C */
227 unsigned long extio_int3 : 1; /* RW, W1C */
228 unsigned long profile_int : 1; /* RW, W1C */
229 unsigned long rtc0 : 1; /* RW, W1C */
230 unsigned long rtc1 : 1; /* RW, W1C */
231 unsigned long rtc2 : 1; /* RW, W1C */
232 unsigned long rtc3 : 1; /* RW, W1C */
233 unsigned long bau_data : 1; /* RW, W1C */
234 unsigned long power_management_req : 1; /* RW, W1C */
235 unsigned long rsvd_57_63 : 7; /* */
236 } s;
237};
238
239/* ========================================================================= */
240/* UVH_EVENT_OCCURRED0_ALIAS */
241/* ========================================================================= */
242#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL
243#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
244
245/* ========================================================================= */
246/* UVH_INT_CMPB */
247/* ========================================================================= */
248#define UVH_INT_CMPB 0x22080UL
249
250#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
251#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
252
253union uvh_int_cmpb_u {
254 unsigned long v;
255 struct uvh_int_cmpb_s {
256 unsigned long real_time_cmpb : 56; /* RW */
257 unsigned long rsvd_56_63 : 8; /* */
258 } s;
259};
260
261/* ========================================================================= */
262/* UVH_INT_CMPC */
263/* ========================================================================= */
264#define UVH_INT_CMPC 0x22100UL
265
266#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0
267#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
268
269union uvh_int_cmpc_u {
270 unsigned long v;
271 struct uvh_int_cmpc_s {
272 unsigned long real_time_cmpc : 56; /* RW */
273 unsigned long rsvd_56_63 : 8; /* */
274 } s;
275};
276
277/* ========================================================================= */
278/* UVH_INT_CMPD */
279/* ========================================================================= */
280#define UVH_INT_CMPD 0x22180UL
17 281
18 #define UV_MMR_ENABLE (1UL << 63) 282#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0
283#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
284
285union uvh_int_cmpd_u {
286 unsigned long v;
287 struct uvh_int_cmpd_s {
288 unsigned long real_time_cmpd : 56; /* RW */
289 unsigned long rsvd_56_63 : 8; /* */
290 } s;
291};
19 292
20/* ========================================================================= */ 293/* ========================================================================= */
21/* UVH_IPI_INT */ 294/* UVH_IPI_INT */
22/* ========================================================================= */ 295/* ========================================================================= */
23#define UVH_IPI_INT 0x60500UL 296#define UVH_IPI_INT 0x60500UL
24#define UVH_IPI_INT_32 0x0360 297#define UVH_IPI_INT_32 0x0348
25 298
26#define UVH_IPI_INT_VECTOR_SHFT 0 299#define UVH_IPI_INT_VECTOR_SHFT 0
27#define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL 300#define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL
@@ -51,7 +324,7 @@ union uvh_ipi_int_u {
51/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ 324/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
52/* ========================================================================= */ 325/* ========================================================================= */
53#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL 326#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
54#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009f0 327#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0
55 328
56#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 329#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
57#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL 330#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -73,7 +346,7 @@ union uvh_lb_bau_intd_payload_queue_first_u {
73/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ 346/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
74/* ========================================================================= */ 347/* ========================================================================= */
75#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL 348#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
76#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009f8 349#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8
77 350
78#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 351#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
79#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL 352#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
@@ -91,7 +364,7 @@ union uvh_lb_bau_intd_payload_queue_last_u {
91/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ 364/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
92/* ========================================================================= */ 365/* ========================================================================= */
93#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL 366#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
94#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x00a00 367#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0
95 368
96#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 369#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
97#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL 370#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
@@ -109,6 +382,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u {
109/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ 382/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
110/* ========================================================================= */ 383/* ========================================================================= */
111#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL 384#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
385#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68
112 386
113#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 387#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
114#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL 388#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
@@ -169,12 +443,13 @@ union uvh_lb_bau_intd_software_acknowledge_u {
169/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ 443/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
170/* ========================================================================= */ 444/* ========================================================================= */
171#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL 445#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL
446#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70
172 447
173/* ========================================================================= */ 448/* ========================================================================= */
174/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ 449/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
175/* ========================================================================= */ 450/* ========================================================================= */
176#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL 451#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
177#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009d8 452#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8
178 453
179#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 454#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
180#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL 455#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL
@@ -197,7 +472,7 @@ union uvh_lb_bau_sb_activation_control_u {
197/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ 472/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
198/* ========================================================================= */ 473/* ========================================================================= */
199#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL 474#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
200#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009e0 475#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0
201 476
202#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 477#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
203#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL 478#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
@@ -213,7 +488,7 @@ union uvh_lb_bau_sb_activation_status_0_u {
213/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ 488/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
214/* ========================================================================= */ 489/* ========================================================================= */
215#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL 490#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
216#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009e8 491#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8
217 492
218#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 493#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
219#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL 494#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
@@ -229,7 +504,7 @@ union uvh_lb_bau_sb_activation_status_1_u {
229/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ 504/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
230/* ========================================================================= */ 505/* ========================================================================= */
231#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL 506#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
232#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009d0 507#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0
233 508
234#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 509#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
235#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL 510#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
@@ -248,6 +523,334 @@ union uvh_lb_bau_sb_descriptor_base_u {
248}; 523};
249 524
250/* ========================================================================= */ 525/* ========================================================================= */
526/* UVH_LB_MCAST_AOERR0_RPT_ENABLE */
527/* ========================================================================= */
528#define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL
529
530#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0
531#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL
532#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1
533#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL
534#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2
535#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL
536#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3
537#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL
538#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4
539#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL
540#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5
541#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL
542#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6
543#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL
544#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7
545#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL
546#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8
547#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL
548#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9
549#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL
550#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10
551#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL
552#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11
553#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL
554#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12
555#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL
556#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13
557#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL
558#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14
559#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL
560#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15
561#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL
562#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16
563#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL
564#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17
565#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL
566#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18
567#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL
568#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19
569#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL
570#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20
571#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL
572#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21
573#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL
574#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22
575#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL
576#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23
577#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL
578#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24
579#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL
580#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25
581#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL
582#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26
583#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL
584#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27
585#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL
586#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28
587#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL
588#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29
589#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL
590#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30
591#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL
592#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31
593#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL
594#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32
595#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL
596#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33
597#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL
598#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34
599#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL
600#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35
601#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL
602#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36
603#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL
604#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37
605#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL
606#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38
607#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL
608#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39
609#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL
610#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40
611#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL
612#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41
613#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL
614#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42
615#define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL
616
617union uvh_lb_mcast_aoerr0_rpt_enable_u {
618 unsigned long v;
619 struct uvh_lb_mcast_aoerr0_rpt_enable_s {
620 unsigned long mcast_obese_msg : 1; /* RW */
621 unsigned long mcast_data_sb_err : 1; /* RW */
622 unsigned long mcast_nack_buff_parity : 1; /* RW */
623 unsigned long mcast_timeout : 1; /* RW */
624 unsigned long mcast_inactive_reply : 1; /* RW */
625 unsigned long mcast_upgrade_error : 1; /* RW */
626 unsigned long mcast_reg_count_underflow : 1; /* RW */
627 unsigned long mcast_rep_obese_msg : 1; /* RW */
628 unsigned long ucache_req_runt_msg : 1; /* RW */
629 unsigned long ucache_req_obese_msg : 1; /* RW */
630 unsigned long ucache_req_data_sb_err : 1; /* RW */
631 unsigned long ucache_rep_runt_msg : 1; /* RW */
632 unsigned long ucache_rep_obese_msg : 1; /* RW */
633 unsigned long ucache_rep_data_sb_err : 1; /* RW */
634 unsigned long ucache_rep_command_err : 1; /* RW */
635 unsigned long ucache_pend_timeout : 1; /* RW */
636 unsigned long macc_req_runt_msg : 1; /* RW */
637 unsigned long macc_req_obese_msg : 1; /* RW */
638 unsigned long macc_req_data_sb_err : 1; /* RW */
639 unsigned long macc_rep_runt_msg : 1; /* RW */
640 unsigned long macc_rep_obese_msg : 1; /* RW */
641 unsigned long macc_rep_data_sb_err : 1; /* RW */
642 unsigned long macc_amo_timeout : 1; /* RW */
643 unsigned long macc_put_timeout : 1; /* RW */
644 unsigned long macc_spurious_event : 1; /* RW */
645 unsigned long ioh_destination_table_parity : 1; /* RW */
646 unsigned long get_had_error_reply : 1; /* RW */
647 unsigned long get_timeout : 1; /* RW */
648 unsigned long lock_manager_had_error_reply : 1; /* RW */
649 unsigned long put_had_error_reply : 1; /* RW */
650 unsigned long put_timeout : 1; /* RW */
651 unsigned long sb_activation_overrun : 1; /* RW */
652 unsigned long completed_gb_activation_had_error_reply : 1; /* RW */
653 unsigned long completed_gb_activation_timeout : 1; /* RW */
654 unsigned long descriptor_buffer_0_parity : 1; /* RW */
655 unsigned long descriptor_buffer_1_parity : 1; /* RW */
656 unsigned long socket_destination_table_parity : 1; /* RW */
657 unsigned long bau_reply_payload_corruption : 1; /* RW */
658 unsigned long io_port_destination_table_parity : 1; /* RW */
659 unsigned long intd_soft_ack_timeout : 1; /* RW */
660 unsigned long int_rep_obese_msg : 1; /* RW */
661 unsigned long int_rep_command_err : 1; /* RW */
662 unsigned long int_timeout : 1; /* RW */
663 unsigned long rsvd_43_63 : 21; /* */
664 } s;
665};
666
667/* ========================================================================= */
668/* UVH_LOCAL_INT0_CONFIG */
669/* ========================================================================= */
670#define UVH_LOCAL_INT0_CONFIG 0x61000UL
671
672#define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0
673#define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
674#define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8
675#define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL
676#define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11
677#define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
678#define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12
679#define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
680#define UVH_LOCAL_INT0_CONFIG_P_SHFT 13
681#define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL
682#define UVH_LOCAL_INT0_CONFIG_T_SHFT 15
683#define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL
684#define UVH_LOCAL_INT0_CONFIG_M_SHFT 16
685#define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL
686#define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32
687#define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
688
689union uvh_local_int0_config_u {
690 unsigned long v;
691 struct uvh_local_int0_config_s {
692 unsigned long vector_ : 8; /* RW */
693 unsigned long dm : 3; /* RW */
694 unsigned long destmode : 1; /* RW */
695 unsigned long status : 1; /* RO */
696 unsigned long p : 1; /* RO */
697 unsigned long rsvd_14 : 1; /* */
698 unsigned long t : 1; /* RO */
699 unsigned long m : 1; /* RW */
700 unsigned long rsvd_17_31: 15; /* */
701 unsigned long apic_id : 32; /* RW */
702 } s;
703};
704
705/* ========================================================================= */
706/* UVH_LOCAL_INT0_ENABLE */
707/* ========================================================================= */
708#define UVH_LOCAL_INT0_ENABLE 0x65000UL
709
710#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0
711#define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL
712#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1
713#define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL
714#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2
715#define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL
716#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3
717#define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL
718#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4
719#define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL
720#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5
721#define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL
722#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6
723#define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL
724#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7
725#define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL
726#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8
727#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL
728#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9
729#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL
730#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10
731#define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL
732#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11
733#define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL
734#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12
735#define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL
736#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13
737#define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL
738#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14
739#define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL
740#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15
741#define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL
742#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16
743#define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL
744#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17
745#define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL
746#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18
747#define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL
748#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19
749#define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL
750#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20
751#define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL
752#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21
753#define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL
754#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22
755#define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL
756#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23
757#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL
758#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24
759#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL
760#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25
761#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL
762#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26
763#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL
764#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27
765#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL
766#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28
767#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL
768#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29
769#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL
770#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30
771#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL
772#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31
773#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL
774#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32
775#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL
776#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33
777#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL
778#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34
779#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL
780#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35
781#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL
782#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36
783#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL
784#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37
785#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL
786#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38
787#define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL
788#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39
789#define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL
790#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40
791#define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL
792#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41
793#define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL
794#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42
795#define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL
796#define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43
797#define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL
798#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44
799#define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL
800
801union uvh_local_int0_enable_u {
802 unsigned long v;
803 struct uvh_local_int0_enable_s {
804 unsigned long lb_hcerr : 1; /* RW */
805 unsigned long gr0_hcerr : 1; /* RW */
806 unsigned long gr1_hcerr : 1; /* RW */
807 unsigned long lh_hcerr : 1; /* RW */
808 unsigned long rh_hcerr : 1; /* RW */
809 unsigned long xn_hcerr : 1; /* RW */
810 unsigned long si_hcerr : 1; /* RW */
811 unsigned long lb_aoerr0 : 1; /* RW */
812 unsigned long gr0_aoerr0 : 1; /* RW */
813 unsigned long gr1_aoerr0 : 1; /* RW */
814 unsigned long lh_aoerr0 : 1; /* RW */
815 unsigned long rh_aoerr0 : 1; /* RW */
816 unsigned long xn_aoerr0 : 1; /* RW */
817 unsigned long si_aoerr0 : 1; /* RW */
818 unsigned long lb_aoerr1 : 1; /* RW */
819 unsigned long gr0_aoerr1 : 1; /* RW */
820 unsigned long gr1_aoerr1 : 1; /* RW */
821 unsigned long lh_aoerr1 : 1; /* RW */
822 unsigned long rh_aoerr1 : 1; /* RW */
823 unsigned long xn_aoerr1 : 1; /* RW */
824 unsigned long si_aoerr1 : 1; /* RW */
825 unsigned long rh_vpi_int : 1; /* RW */
826 unsigned long system_shutdown_int : 1; /* RW */
827 unsigned long lb_irq_int_0 : 1; /* RW */
828 unsigned long lb_irq_int_1 : 1; /* RW */
829 unsigned long lb_irq_int_2 : 1; /* RW */
830 unsigned long lb_irq_int_3 : 1; /* RW */
831 unsigned long lb_irq_int_4 : 1; /* RW */
832 unsigned long lb_irq_int_5 : 1; /* RW */
833 unsigned long lb_irq_int_6 : 1; /* RW */
834 unsigned long lb_irq_int_7 : 1; /* RW */
835 unsigned long lb_irq_int_8 : 1; /* RW */
836 unsigned long lb_irq_int_9 : 1; /* RW */
837 unsigned long lb_irq_int_10 : 1; /* RW */
838 unsigned long lb_irq_int_11 : 1; /* RW */
839 unsigned long lb_irq_int_12 : 1; /* RW */
840 unsigned long lb_irq_int_13 : 1; /* RW */
841 unsigned long lb_irq_int_14 : 1; /* RW */
842 unsigned long lb_irq_int_15 : 1; /* RW */
843 unsigned long l1_nmi_int : 1; /* RW */
844 unsigned long stop_clock : 1; /* RW */
845 unsigned long asic_to_l1 : 1; /* RW */
846 unsigned long l1_to_asic : 1; /* RW */
847 unsigned long ltc_int : 1; /* RW */
848 unsigned long la_seq_trigger : 1; /* RW */
849 unsigned long rsvd_45_63 : 19; /* */
850 } s;
851};
852
853/* ========================================================================= */
251/* UVH_NODE_ID */ 854/* UVH_NODE_ID */
252/* ========================================================================= */ 855/* ========================================================================= */
253#define UVH_NODE_ID 0x0UL 856#define UVH_NODE_ID 0x0UL
@@ -284,14 +887,101 @@ union uvh_node_id_u {
284}; 887};
285 888
286/* ========================================================================= */ 889/* ========================================================================= */
890/* UVH_NODE_PRESENT_TABLE */
891/* ========================================================================= */
892#define UVH_NODE_PRESENT_TABLE 0x1400UL
893#define UVH_NODE_PRESENT_TABLE_DEPTH 16
894
895#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
896#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
897
898union uvh_node_present_table_u {
899 unsigned long v;
900 struct uvh_node_present_table_s {
901 unsigned long nodes : 64; /* RW */
902 } s;
903};
904
905/* ========================================================================= */
906/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
907/* ========================================================================= */
908#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
909
910#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
911#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
912
913union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
914 unsigned long v;
915 struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
916 unsigned long rsvd_0_23 : 24; /* */
917 unsigned long dest_base : 22; /* RW */
918 unsigned long rsvd_46_63: 18; /* */
919 } s;
920};
921
922/* ========================================================================= */
923/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
924/* ========================================================================= */
925#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
926
927#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
928#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
929
930union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
931 unsigned long v;
932 struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
933 unsigned long rsvd_0_23 : 24; /* */
934 unsigned long dest_base : 22; /* RW */
935 unsigned long rsvd_46_63: 18; /* */
936 } s;
937};
938
939/* ========================================================================= */
940/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
941/* ========================================================================= */
942#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
943
944#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
945#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
946
947union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
948 unsigned long v;
949 struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
950 unsigned long rsvd_0_23 : 24; /* */
951 unsigned long dest_base : 22; /* RW */
952 unsigned long rsvd_46_63: 18; /* */
953 } s;
954};
955
956/* ========================================================================= */
957/* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */
958/* ========================================================================= */
959#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL
960
961#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26
962#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
963#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
964#define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
965
966union uvh_rh_gam_cfg_overlay_config_mmr_u {
967 unsigned long v;
968 struct uvh_rh_gam_cfg_overlay_config_mmr_s {
969 unsigned long rsvd_0_25: 26; /* */
970 unsigned long base : 20; /* RW */
971 unsigned long rsvd_46_62: 17; /* */
972 unsigned long enable : 1; /* RW */
973 } s;
974};
975
976/* ========================================================================= */
287/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ 977/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
288/* ========================================================================= */ 978/* ========================================================================= */
289#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL 979#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
290 980
291#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 981#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
292#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL 982#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
293#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 46 983#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48
294#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0000400000000000UL 984#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL
295#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 985#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
296#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL 986#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
297#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 987#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
@@ -302,8 +992,9 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
302 struct uvh_rh_gam_gru_overlay_config_mmr_s { 992 struct uvh_rh_gam_gru_overlay_config_mmr_s {
303 unsigned long rsvd_0_27: 28; /* */ 993 unsigned long rsvd_0_27: 28; /* */
304 unsigned long base : 18; /* RW */ 994 unsigned long base : 18; /* RW */
995 unsigned long rsvd_46_47: 2; /* */
305 unsigned long gr4 : 1; /* RW */ 996 unsigned long gr4 : 1; /* RW */
306 unsigned long rsvd_47_51: 5; /* */ 997 unsigned long rsvd_49_51: 3; /* */
307 unsigned long n_gru : 4; /* RW */ 998 unsigned long n_gru : 4; /* RW */
308 unsigned long rsvd_56_62: 7; /* */ 999 unsigned long rsvd_56_62: 7; /* */
309 unsigned long enable : 1; /* RW */ 1000 unsigned long enable : 1; /* RW */
@@ -311,6 +1002,32 @@ union uvh_rh_gam_gru_overlay_config_mmr_u {
311}; 1002};
312 1003
313/* ========================================================================= */ 1004/* ========================================================================= */
1005/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */
1006/* ========================================================================= */
1007#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
1008
1009#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
1010#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL
1011#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
1012#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL
1013#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
1014#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
1015#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
1016#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
1017
1018union uvh_rh_gam_mmioh_overlay_config_mmr_u {
1019 unsigned long v;
1020 struct uvh_rh_gam_mmioh_overlay_config_mmr_s {
1021 unsigned long rsvd_0_29: 30; /* */
1022 unsigned long base : 16; /* RW */
1023 unsigned long m_io : 6; /* RW */
1024 unsigned long n_io : 4; /* RW */
1025 unsigned long rsvd_56_62: 7; /* */
1026 unsigned long enable : 1; /* RW */
1027 } s;
1028};
1029
1030/* ========================================================================= */
314/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ 1031/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
315/* ========================================================================= */ 1032/* ========================================================================= */
316#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL 1033#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
@@ -336,7 +1053,7 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u {
336/* ========================================================================= */ 1053/* ========================================================================= */
337/* UVH_RTC */ 1054/* UVH_RTC */
338/* ========================================================================= */ 1055/* ========================================================================= */
339#define UVH_RTC 0x28000UL 1056#define UVH_RTC 0x340000UL
340 1057
341#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 1058#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
342#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL 1059#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
@@ -350,6 +1067,139 @@ union uvh_rtc_u {
350}; 1067};
351 1068
352/* ========================================================================= */ 1069/* ========================================================================= */
1070/* UVH_RTC1_INT_CONFIG */
1071/* ========================================================================= */
1072#define UVH_RTC1_INT_CONFIG 0x615c0UL
1073
1074#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0
1075#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
1076#define UVH_RTC1_INT_CONFIG_DM_SHFT 8
1077#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL
1078#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11
1079#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
1080#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12
1081#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
1082#define UVH_RTC1_INT_CONFIG_P_SHFT 13
1083#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL
1084#define UVH_RTC1_INT_CONFIG_T_SHFT 15
1085#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL
1086#define UVH_RTC1_INT_CONFIG_M_SHFT 16
1087#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
1088#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32
1089#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
1090
1091union uvh_rtc1_int_config_u {
1092 unsigned long v;
1093 struct uvh_rtc1_int_config_s {
1094 unsigned long vector_ : 8; /* RW */
1095 unsigned long dm : 3; /* RW */
1096 unsigned long destmode : 1; /* RW */
1097 unsigned long status : 1; /* RO */
1098 unsigned long p : 1; /* RO */
1099 unsigned long rsvd_14 : 1; /* */
1100 unsigned long t : 1; /* RO */
1101 unsigned long m : 1; /* RW */
1102 unsigned long rsvd_17_31: 15; /* */
1103 unsigned long apic_id : 32; /* RW */
1104 } s;
1105};
1106
1107/* ========================================================================= */
1108/* UVH_RTC2_INT_CONFIG */
1109/* ========================================================================= */
1110#define UVH_RTC2_INT_CONFIG 0x61600UL
1111
1112#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0
1113#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
1114#define UVH_RTC2_INT_CONFIG_DM_SHFT 8
1115#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL
1116#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11
1117#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
1118#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12
1119#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
1120#define UVH_RTC2_INT_CONFIG_P_SHFT 13
1121#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL
1122#define UVH_RTC2_INT_CONFIG_T_SHFT 15
1123#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL
1124#define UVH_RTC2_INT_CONFIG_M_SHFT 16
1125#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL
1126#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32
1127#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
1128
1129union uvh_rtc2_int_config_u {
1130 unsigned long v;
1131 struct uvh_rtc2_int_config_s {
1132 unsigned long vector_ : 8; /* RW */
1133 unsigned long dm : 3; /* RW */
1134 unsigned long destmode : 1; /* RW */
1135 unsigned long status : 1; /* RO */
1136 unsigned long p : 1; /* RO */
1137 unsigned long rsvd_14 : 1; /* */
1138 unsigned long t : 1; /* RO */
1139 unsigned long m : 1; /* RW */
1140 unsigned long rsvd_17_31: 15; /* */
1141 unsigned long apic_id : 32; /* RW */
1142 } s;
1143};
1144
1145/* ========================================================================= */
1146/* UVH_RTC3_INT_CONFIG */
1147/* ========================================================================= */
1148#define UVH_RTC3_INT_CONFIG 0x61640UL
1149
1150#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0
1151#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL
1152#define UVH_RTC3_INT_CONFIG_DM_SHFT 8
1153#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL
1154#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11
1155#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL
1156#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12
1157#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL
1158#define UVH_RTC3_INT_CONFIG_P_SHFT 13
1159#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL
1160#define UVH_RTC3_INT_CONFIG_T_SHFT 15
1161#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL
1162#define UVH_RTC3_INT_CONFIG_M_SHFT 16
1163#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL
1164#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32
1165#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
1166
1167union uvh_rtc3_int_config_u {
1168 unsigned long v;
1169 struct uvh_rtc3_int_config_s {
1170 unsigned long vector_ : 8; /* RW */
1171 unsigned long dm : 3; /* RW */
1172 unsigned long destmode : 1; /* RW */
1173 unsigned long status : 1; /* RO */
1174 unsigned long p : 1; /* RO */
1175 unsigned long rsvd_14 : 1; /* */
1176 unsigned long t : 1; /* RO */
1177 unsigned long m : 1; /* RW */
1178 unsigned long rsvd_17_31: 15; /* */
1179 unsigned long apic_id : 32; /* RW */
1180 } s;
1181};
1182
1183/* ========================================================================= */
1184/* UVH_RTC_INC_RATIO */
1185/* ========================================================================= */
1186#define UVH_RTC_INC_RATIO 0x350000UL
1187
1188#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0
1189#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL
1190#define UVH_RTC_INC_RATIO_RATIO_SHFT 20
1191#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL
1192
1193union uvh_rtc_inc_ratio_u {
1194 unsigned long v;
1195 struct uvh_rtc_inc_ratio_s {
1196 unsigned long fraction : 20; /* RW */
1197 unsigned long ratio : 3; /* RW */
1198 unsigned long rsvd_23_63: 41; /* */
1199 } s;
1200};
1201
1202/* ========================================================================= */
353/* UVH_SI_ADDR_MAP_CONFIG */ 1203/* UVH_SI_ADDR_MAP_CONFIG */
354/* ========================================================================= */ 1204/* ========================================================================= */
355#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL 1205#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL
@@ -369,5 +1219,77 @@ union uvh_si_addr_map_config_u {
369 } s; 1219 } s;
370}; 1220};
371 1221
1222/* ========================================================================= */
1223/* UVH_SI_ALIAS0_OVERLAY_CONFIG */
1224/* ========================================================================= */
1225#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL
1226
1227#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24
1228#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
1229#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48
1230#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
1231#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63
1232#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
1233
1234union uvh_si_alias0_overlay_config_u {
1235 unsigned long v;
1236 struct uvh_si_alias0_overlay_config_s {
1237 unsigned long rsvd_0_23: 24; /* */
1238 unsigned long base : 8; /* RW */
1239 unsigned long rsvd_32_47: 16; /* */
1240 unsigned long m_alias : 5; /* RW */
1241 unsigned long rsvd_53_62: 10; /* */
1242 unsigned long enable : 1; /* RW */
1243 } s;
1244};
1245
1246/* ========================================================================= */
1247/* UVH_SI_ALIAS1_OVERLAY_CONFIG */
1248/* ========================================================================= */
1249#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL
1250
1251#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24
1252#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
1253#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48
1254#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
1255#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63
1256#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
1257
1258union uvh_si_alias1_overlay_config_u {
1259 unsigned long v;
1260 struct uvh_si_alias1_overlay_config_s {
1261 unsigned long rsvd_0_23: 24; /* */
1262 unsigned long base : 8; /* RW */
1263 unsigned long rsvd_32_47: 16; /* */
1264 unsigned long m_alias : 5; /* RW */
1265 unsigned long rsvd_53_62: 10; /* */
1266 unsigned long enable : 1; /* RW */
1267 } s;
1268};
1269
1270/* ========================================================================= */
1271/* UVH_SI_ALIAS2_OVERLAY_CONFIG */
1272/* ========================================================================= */
1273#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL
1274
1275#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24
1276#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL
1277#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48
1278#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL
1279#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63
1280#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL
1281
1282union uvh_si_alias2_overlay_config_u {
1283 unsigned long v;
1284 struct uvh_si_alias2_overlay_config_s {
1285 unsigned long rsvd_0_23: 24; /* */
1286 unsigned long base : 8; /* RW */
1287 unsigned long rsvd_32_47: 16; /* */
1288 unsigned long m_alias : 5; /* RW */
1289 unsigned long rsvd_53_62: 10; /* */
1290 unsigned long enable : 1; /* RW */
1291 } s;
1292};
1293
372 1294
373#endif /* __ASM_X86_UV_MMRS__ */ 1295#endif /* __ASM_X86_UV_MMRS__ */
diff --git a/include/asm-x86/mach-visws/cobalt.h b/include/asm-x86/visws/cobalt.h
index 995258831b7f..995258831b7f 100644
--- a/include/asm-x86/mach-visws/cobalt.h
+++ b/include/asm-x86/visws/cobalt.h
diff --git a/include/asm-x86/mach-visws/lithium.h b/include/asm-x86/visws/lithium.h
index dfcd4f07ab85..dfcd4f07ab85 100644
--- a/include/asm-x86/mach-visws/lithium.h
+++ b/include/asm-x86/visws/lithium.h
diff --git a/include/asm-x86/mach-visws/piix4.h b/include/asm-x86/visws/piix4.h
index 83ea4f46e419..83ea4f46e419 100644
--- a/include/asm-x86/mach-visws/piix4.h
+++ b/include/asm-x86/visws/piix4.h
diff --git a/include/asm-x86/visws/sgivw.h b/include/asm-x86/visws/sgivw.h
new file mode 100644
index 000000000000..5fbf63e1003c
--- /dev/null
+++ b/include/asm-x86/visws/sgivw.h
@@ -0,0 +1,5 @@
1/*
2 * Frame buffer position and size:
3 */
4extern unsigned long sgivwfb_mem_phys;
5extern unsigned long sgivwfb_mem_size;
diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h
index 074b357146df..5ce351325e01 100644
--- a/include/asm-x86/vm86.h
+++ b/include/asm-x86/vm86.h
@@ -14,12 +14,6 @@
14 14
15#include <asm/processor-flags.h> 15#include <asm/processor-flags.h>
16 16
17#ifdef CONFIG_VM86
18#define X86_VM_MASK X86_EFLAGS_VM
19#else
20#define X86_VM_MASK 0 /* No VM86 support */
21#endif
22
23#define BIOSSEG 0x0f000 17#define BIOSSEG 0x0f000
24 18
25#define CPU_086 0 19#define CPU_086 0
@@ -121,7 +115,6 @@ struct vm86plus_info_struct {
121 unsigned long is_vm86pus:1; /* for vm86 internal use */ 115 unsigned long is_vm86pus:1; /* for vm86 internal use */
122 unsigned char vm86dbg_intxxtab[32]; /* for debugger */ 116 unsigned char vm86dbg_intxxtab[32]; /* for debugger */
123}; 117};
124
125struct vm86plus_struct { 118struct vm86plus_struct {
126 struct vm86_regs regs; 119 struct vm86_regs regs;
127 unsigned long flags; 120 unsigned long flags;
@@ -133,6 +126,9 @@ struct vm86plus_struct {
133}; 126};
134 127
135#ifdef __KERNEL__ 128#ifdef __KERNEL__
129
130#include <asm/ptrace.h>
131
136/* 132/*
137 * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86 133 * This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86
138 * mode - the main change is that the old segment descriptors aren't 134 * mode - the main change is that the old segment descriptors aren't
@@ -141,7 +137,6 @@ struct vm86plus_struct {
141 * at the end of the structure. Look at ptrace.h to see the "normal" 137 * at the end of the structure. Look at ptrace.h to see the "normal"
142 * setup. For user space layout see 'struct vm86_regs' above. 138 * setup. For user space layout see 'struct vm86_regs' above.
143 */ 139 */
144#include <asm/ptrace.h>
145 140
146struct kernel_vm86_regs { 141struct kernel_vm86_regs {
147/* 142/*
diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h
index 478188130328..c3118c385156 100644
--- a/include/asm-x86/vmi_time.h
+++ b/include/asm-x86/vmi_time.h
@@ -50,7 +50,7 @@ extern void __init vmi_time_init(void);
50extern unsigned long vmi_get_wallclock(void); 50extern unsigned long vmi_get_wallclock(void);
51extern int vmi_set_wallclock(unsigned long now); 51extern int vmi_set_wallclock(unsigned long now);
52extern unsigned long long vmi_sched_clock(void); 52extern unsigned long long vmi_sched_clock(void);
53extern unsigned long vmi_cpu_khz(void); 53extern unsigned long vmi_tsc_khz(void);
54 54
55#ifdef CONFIG_X86_LOCAL_APIC 55#ifdef CONFIG_X86_LOCAL_APIC
56extern void __devinit vmi_time_bsp_init(void); 56extern void __devinit vmi_time_bsp_init(void);
diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index c2ccd997ed35..2a4f9b41d684 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -176,9 +176,9 @@ HYPERVISOR_fpu_taskswitch(int set)
176} 176}
177 177
178static inline int 178static inline int
179HYPERVISOR_sched_op(int cmd, unsigned long arg) 179HYPERVISOR_sched_op(int cmd, void *arg)
180{ 180{
181 return _hypercall2(int, sched_op, cmd, arg); 181 return _hypercall2(int, sched_op_new, cmd, arg);
182} 182}
183 183
184static inline long 184static inline long
@@ -315,6 +315,13 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
315} 315}
316 316
317static inline void 317static inline void
318MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
319{
320 mcl->op = __HYPERVISOR_fpu_taskswitch;
321 mcl->args[0] = set;
322}
323
324static inline void
318MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, 325MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
319 pte_t new_val, unsigned long flags) 326 pte_t new_val, unsigned long flags)
320{ 327{
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index e11f24038b1d..377c04591c15 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -26,15 +26,20 @@ typedef struct xpaddr {
26#define FOREIGN_FRAME_BIT (1UL<<31) 26#define FOREIGN_FRAME_BIT (1UL<<31)
27#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) 27#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
28 28
29extern unsigned long *phys_to_machine_mapping; 29/* Maximum amount of memory we can handle in a domain in pages */
30#define MAX_DOMAIN_PAGES \
31 ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
32
33
34extern unsigned long get_phys_to_machine(unsigned long pfn);
35extern void set_phys_to_machine(unsigned long pfn, unsigned long mfn);
30 36
31static inline unsigned long pfn_to_mfn(unsigned long pfn) 37static inline unsigned long pfn_to_mfn(unsigned long pfn)
32{ 38{
33 if (xen_feature(XENFEAT_auto_translated_physmap)) 39 if (xen_feature(XENFEAT_auto_translated_physmap))
34 return pfn; 40 return pfn;
35 41
36 return phys_to_machine_mapping[(unsigned int)(pfn)] & 42 return get_phys_to_machine(pfn) & ~FOREIGN_FRAME_BIT;
37 ~FOREIGN_FRAME_BIT;
38} 43}
39 44
40static inline int phys_to_machine_mapping_valid(unsigned long pfn) 45static inline int phys_to_machine_mapping_valid(unsigned long pfn)
@@ -42,7 +47,7 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
42 if (xen_feature(XENFEAT_auto_translated_physmap)) 47 if (xen_feature(XENFEAT_auto_translated_physmap))
43 return 1; 48 return 1;
44 49
45 return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); 50 return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
46} 51}
47 52
48static inline unsigned long mfn_to_pfn(unsigned long mfn) 53static inline unsigned long mfn_to_pfn(unsigned long mfn)
@@ -106,20 +111,12 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
106 unsigned long pfn = mfn_to_pfn(mfn); 111 unsigned long pfn = mfn_to_pfn(mfn);
107 if ((pfn < max_mapnr) 112 if ((pfn < max_mapnr)
108 && !xen_feature(XENFEAT_auto_translated_physmap) 113 && !xen_feature(XENFEAT_auto_translated_physmap)
109 && (phys_to_machine_mapping[pfn] != mfn)) 114 && (get_phys_to_machine(pfn) != mfn))
110 return max_mapnr; /* force !pfn_valid() */ 115 return max_mapnr; /* force !pfn_valid() */
116 /* XXX fixme; not true with sparsemem */
111 return pfn; 117 return pfn;
112} 118}
113 119
114static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
115{
116 if (xen_feature(XENFEAT_auto_translated_physmap)) {
117 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
118 return;
119 }
120 phys_to_machine_mapping[pfn] = mfn;
121}
122
123/* VIRT <-> MACHINE conversion */ 120/* VIRT <-> MACHINE conversion */
124#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) 121#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
125#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) 122#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
diff --git a/include/asm-x86/xor_32.h b/include/asm-x86/xor_32.h
index 067b5c1835a3..921b45840449 100644
--- a/include/asm-x86/xor_32.h
+++ b/include/asm-x86/xor_32.h
@@ -1,3 +1,6 @@
1#ifndef ASM_X86__XOR_32_H
2#define ASM_X86__XOR_32_H
3
1/* 4/*
2 * Optimized RAID-5 checksumming functions for MMX and SSE. 5 * Optimized RAID-5 checksumming functions for MMX and SSE.
3 * 6 *
@@ -881,3 +884,5 @@ do { \
881 deals with a load to a line that is being prefetched. */ 884 deals with a load to a line that is being prefetched. */
882#define XOR_SELECT_TEMPLATE(FASTEST) \ 885#define XOR_SELECT_TEMPLATE(FASTEST) \
883 (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) 886 (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
887
888#endif /* ASM_X86__XOR_32_H */
diff --git a/include/asm-x86/xor_64.h b/include/asm-x86/xor_64.h
index 24957e39ac8a..2d3a18de295b 100644
--- a/include/asm-x86/xor_64.h
+++ b/include/asm-x86/xor_64.h
@@ -1,3 +1,6 @@
1#ifndef ASM_X86__XOR_64_H
2#define ASM_X86__XOR_64_H
3
1/* 4/*
2 * Optimized RAID-5 checksumming functions for MMX and SSE. 5 * Optimized RAID-5 checksumming functions for MMX and SSE.
3 * 6 *
@@ -354,3 +357,5 @@ do { \
354 We may also be able to load into the L1 only depending on how the cpu 357 We may also be able to load into the L1 only depending on how the cpu
355 deals with a load to a line that is being prefetched. */ 358 deals with a load to a line that is being prefetched. */
356#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse) 359#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
360
361#endif /* ASM_X86__XOR_64_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 41f7ce7edd7a..0601075d09a1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -82,6 +82,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
82int early_acpi_boot_init(void); 82int early_acpi_boot_init(void);
83int acpi_boot_init (void); 83int acpi_boot_init (void);
84int acpi_boot_table_init (void); 84int acpi_boot_table_init (void);
85int acpi_mps_check (void);
85int acpi_numa_init (void); 86int acpi_numa_init (void);
86 87
87int acpi_table_init (void); 88int acpi_table_init (void);
@@ -250,6 +251,11 @@ static inline int acpi_boot_table_init(void)
250 return 0; 251 return 0;
251} 252}
252 253
254static inline int acpi_mps_check(void)
255{
256 return 0;
257}
258
253static inline int acpi_check_resource_conflict(struct resource *res) 259static inline int acpi_check_resource_conflict(struct resource *res)
254{ 260{
255 return 0; 261 return 0;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 686895bacd9d..a1d9b79078ea 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
84 __alloc_bootmem_low(x, PAGE_SIZE, 0) 84 __alloc_bootmem_low(x, PAGE_SIZE, 0)
85#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 85#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
86 86
87extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
88 int flags);
87extern unsigned long free_all_bootmem(void); 89extern unsigned long free_all_bootmem(void);
88extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); 90extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
89extern void *__alloc_bootmem_node(pg_data_t *pgdat, 91extern void *__alloc_bootmem_node(pg_data_t *pgdat,
diff --git a/include/linux/console.h b/include/linux/console.h
index a4f27fbdf549..248e6e3b9b73 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -108,6 +108,8 @@ struct console {
108 struct console *next; 108 struct console *next;
109}; 109};
110 110
111extern int console_set_on_cmdline;
112
111extern int add_preferred_console(char *name, int idx, char *options); 113extern int add_preferred_console(char *name, int idx, char *options);
112extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); 114extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
113extern void register_console(struct console *); 115extern void register_console(struct console *);
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 54552d21296e..fd832c6d419e 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x)
41#define ndelay(x) ndelay(x) 41#define ndelay(x) ndelay(x)
42#endif 42#endif
43 43
44extern unsigned long lpj_fine;
44void calibrate_delay(void); 45void calibrate_delay(void);
45void msleep(unsigned int msecs); 46void msleep(unsigned int msecs);
46unsigned long msleep_interruptible(unsigned int msecs); 47unsigned long msleep_interruptible(unsigned int msecs);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a5f359a7ad0e..807373d467f7 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out)
287extern void efi_init (void); 287extern void efi_init (void);
288extern void *efi_get_pal_addr (void); 288extern void *efi_get_pal_addr (void);
289extern void efi_map_pal_code (void); 289extern void efi_map_pal_code (void);
290extern void efi_map_memmap(void);
291extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); 290extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
292extern void efi_gettimeofday (struct timespec *ts); 291extern void efi_gettimeofday (struct timespec *ts);
293extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ 292extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */
@@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void);
295extern u32 efi_mem_type (unsigned long phys_addr); 294extern u32 efi_mem_type (unsigned long phys_addr);
296extern u64 efi_mem_attributes (unsigned long phys_addr); 295extern u64 efi_mem_attributes (unsigned long phys_addr);
297extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); 296extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
298extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size,
299 u64 attr);
300extern int __init efi_uart_console_only (void); 297extern int __init efi_uart_console_only (void);
301extern void efi_initialize_iomem_resources(struct resource *code_resource, 298extern void efi_initialize_iomem_resources(struct resource *code_resource,
302 struct resource *data_resource, struct resource *bss_resource); 299 struct resource *data_resource, struct resource *bss_resource);
303extern unsigned long efi_get_time(void); 300extern unsigned long efi_get_time(void);
304extern int efi_set_rtc_mmss(unsigned long nowtime); 301extern int efi_set_rtc_mmss(unsigned long nowtime);
305extern int is_available_memory(efi_memory_desc_t * md);
306extern struct efi_memory_map memmap; 302extern struct efi_memory_map memmap;
307 303
308/** 304/**
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
new file mode 100644
index 000000000000..acbdbcc16051
--- /dev/null
+++ b/include/linux/firmware-map.h
@@ -0,0 +1,74 @@
1/*
2 * include/linux/firmware-map.h:
3 * Copyright (C) 2008 SUSE LINUX Products GmbH
4 * by Bernhard Walle <bwalle@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License v2.0 as published by
8 * the Free Software Foundation
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 */
16#ifndef _LINUX_FIRMWARE_MAP_H
17#define _LINUX_FIRMWARE_MAP_H
18
19#include <linux/list.h>
20#include <linux/kobject.h>
21
22/*
23 * provide a dummy interface if CONFIG_FIRMWARE_MEMMAP is disabled
24 */
25#ifdef CONFIG_FIRMWARE_MEMMAP
26
27/**
28 * Adds a firmware mapping entry. This function uses kmalloc() for memory
29 * allocation. Use firmware_map_add_early() if you want to use the bootmem
30 * allocator.
31 *
32 * That function must be called before late_initcall.
33 *
34 * @start: Start of the memory range.
35 * @end: End of the memory range (inclusive).
36 * @type: Type of the memory range.
37 *
38 * Returns 0 on success, or -ENOMEM if no memory could be allocated.
39 */
40int firmware_map_add(resource_size_t start, resource_size_t end,
41 const char *type);
42
43/**
44 * Adds a firmware mapping entry. This function uses the bootmem allocator
45 * for memory allocation. Use firmware_map_add() if you want to use kmalloc().
46 *
47 * That function must be called before late_initcall.
48 *
49 * @start: Start of the memory range.
50 * @end: End of the memory range (inclusive).
51 * @type: Type of the memory range.
52 *
53 * Returns 0 on success, or -ENOMEM if no memory could be allocated.
54 */
55int firmware_map_add_early(resource_size_t start, resource_size_t end,
56 const char *type);
57
58#else /* CONFIG_FIRMWARE_MEMMAP */
59
60static inline int firmware_map_add(resource_size_t start, resource_size_t end,
61 const char *type)
62{
63 return 0;
64}
65
66static inline int firmware_map_add_early(resource_size_t start,
67 resource_size_t end, const char *type)
68{
69 return 0;
70}
71
72#endif /* CONFIG_FIRMWARE_MEMMAP */
73
74#endif /* _LINUX_FIRMWARE_MAP_H */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index e8ffce898bf9..cf9f40a91c9c 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -1,11 +1,11 @@
1#ifndef _LINUX_KERNEL_STAT_H 1#ifndef _LINUX_KERNEL_STAT_H
2#define _LINUX_KERNEL_STAT_H 2#define _LINUX_KERNEL_STAT_H
3 3
4#include <asm/irq.h>
5#include <linux/smp.h> 4#include <linux/smp.h>
6#include <linux/threads.h> 5#include <linux/threads.h>
7#include <linux/percpu.h> 6#include <linux/percpu.h>
8#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/irq.h>
9#include <asm/cputime.h> 9#include <asm/cputime.h>
10 10
11/* 11/*
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 2119610b24f8..9fd1f859021b 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -1,6 +1,7 @@
1#ifndef _LINUX_LINKAGE_H 1#ifndef _LINUX_LINKAGE_H
2#define _LINUX_LINKAGE_H 2#define _LINUX_LINKAGE_H
3 3
4#include <linux/compiler.h>
4#include <asm/linkage.h> 5#include <asm/linkage.h>
5 6
6#ifdef __cplusplus 7#ifdef __cplusplus
@@ -17,6 +18,9 @@
17# define asmregparm 18# define asmregparm
18#endif 19#endif
19 20
21#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE)
22#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE)
23
20/* 24/*
21 * This is used by architectures to keep arguments on the stack 25 * This is used by architectures to keep arguments on the stack
22 * untouched by the compiler by keeping them live until the end. 26 * untouched by the compiler by keeping them live until the end.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 586a943cab01..cf1cd3a2ed78 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -998,8 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat,
998extern void free_area_init_nodes(unsigned long *max_zone_pfn); 998extern void free_area_init_nodes(unsigned long *max_zone_pfn);
999extern void add_active_range(unsigned int nid, unsigned long start_pfn, 999extern void add_active_range(unsigned int nid, unsigned long start_pfn,
1000 unsigned long end_pfn); 1000 unsigned long end_pfn);
1001extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, 1001extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
1002 unsigned long new_end_pfn); 1002 unsigned long end_pfn);
1003extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, 1003extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
1004 unsigned long end_pfn); 1004 unsigned long end_pfn);
1005extern void remove_all_active_ranges(void); 1005extern void remove_all_active_ranges(void);
@@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void);
1011extern unsigned long find_max_pfn_with_active_regions(void); 1011extern unsigned long find_max_pfn_with_active_regions(void);
1012extern void free_bootmem_with_active_regions(int nid, 1012extern void free_bootmem_with_active_regions(int nid,
1013 unsigned long max_low_pfn); 1013 unsigned long max_low_pfn);
1014typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1015extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1014extern void sparse_memory_present_with_active_regions(int nid); 1016extern void sparse_memory_present_with_active_regions(int nid);
1015#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 1017#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
1016extern int early_pfn_to_nid(unsigned long pfn); 1018extern int early_pfn_to_nid(unsigned long pfn);
@@ -1024,6 +1026,7 @@ extern void mem_init(void);
1024extern void show_mem(void); 1026extern void show_mem(void);
1025extern void si_meminfo(struct sysinfo * val); 1027extern void si_meminfo(struct sysinfo * val);
1026extern void si_meminfo_node(struct sysinfo *val, int nid); 1028extern void si_meminfo_node(struct sysinfo *val, int nid);
1029extern int after_bootmem;
1027 1030
1028#ifdef CONFIG_NUMA 1031#ifdef CONFIG_NUMA
1029extern void setup_per_cpu_pageset(void); 1032extern void setup_per_cpu_pageset(void);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f31debfac926..0d2a4e7012aa 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
157__PAGEFLAG(Slab, slab) 157__PAGEFLAG(Slab, slab)
158PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ 158PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */
159PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ 159PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */
160PAGEFLAG(SavePinned, dirty); /* Xen */
160PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) 161PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
161PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) 162PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
162 __SETPAGEFLAG(Private, private) 163 __SETPAGEFLAG(Private, private)
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index e875905f7b12..e8c06122be36 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,13 +25,11 @@
25 25
26#include <linux/types.h> 26#include <linux/types.h>
27 27
28/* Macro to aid the definition of ranges of bits */
29#define PB_range(name, required_bits) \
30 name, name ## _end = (name + required_bits) - 1
31
32/* Bit indices that affect a whole block of pages */ 28/* Bit indices that affect a whole block of pages */
33enum pageblock_bits { 29enum pageblock_bits {
34 PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ 30 PB_migrate,
31 PB_migrate_end = PB_migrate + 3 - 1,
32 /* 3 bits required for migrate types */
35 NR_PAGEBLOCK_BITS 33 NR_PAGEBLOCK_BITS
36}; 34};
37 35
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index f3f4f28c6960..c9ba2fdf807d 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -8,7 +8,7 @@ extern int pm_trace_enabled;
8 8
9struct device; 9struct device;
10extern void set_trace_device(struct device *); 10extern void set_trace_device(struct device *);
11extern void generate_resume_trace(void *tracedata, unsigned int user); 11extern void generate_resume_trace(const void *tracedata, unsigned int user);
12 12
13#define TRACE_DEVICE(dev) do { \ 13#define TRACE_DEVICE(dev) do { \
14 if (pm_trace_enabled) \ 14 if (pm_trace_enabled) \
diff --git a/include/xen/events.h b/include/xen/events.h
index acd8e062c85f..67c4436554a9 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id);
32 32
33void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); 33void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
34int resend_irq_on_evtchn(unsigned int irq); 34int resend_irq_on_evtchn(unsigned int irq);
35void rebind_evtchn_irq(int evtchn, int irq);
35 36
36static inline void notify_remote_via_evtchn(int port) 37static inline void notify_remote_via_evtchn(int port)
37{ 38{
@@ -40,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port)
40} 41}
41 42
42extern void notify_remote_via_irq(int irq); 43extern void notify_remote_via_irq(int irq);
44
45extern void xen_irq_resume(void);
46
43#endif /* _XEN_EVENTS_H */ 47#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 466204846121..a40f1cd91be1 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -51,6 +51,9 @@ struct gnttab_free_callback {
51 u16 count; 51 u16 count;
52}; 52};
53 53
54int gnttab_suspend(void);
55int gnttab_resume(void);
56
54int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, 57int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
55 int readonly); 58 int readonly);
56 59
diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h
index 21c0ecfd786d..98b79bc404dd 100644
--- a/include/xen/hvc-console.h
+++ b/include/xen/hvc-console.h
@@ -3,4 +3,13 @@
3 3
4extern struct console xenboot_console; 4extern struct console xenboot_console;
5 5
6#ifdef CONFIG_HVC_XEN
7void xen_console_resume(void);
8#else
9static inline void xen_console_resume(void) { }
10#endif
11
12void xen_raw_console_write(const char *str);
13void xen_raw_printk(const char *fmt, ...);
14
6#endif /* XEN_HVC_CONSOLE_H */ 15#endif /* XEN_HVC_CONSOLE_H */
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index a64d3df5bd95..7a8262c375cc 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -120,6 +120,26 @@
120 */ 120 */
121#define XEN_ELFNOTE_BSD_SYMTAB 11 121#define XEN_ELFNOTE_BSD_SYMTAB 11
122 122
123/*
124 * The lowest address the hypervisor hole can begin at (numeric).
125 *
126 * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
127 * also indicates to the hypervisor that the kernel can deal with the
128 * hole starting at a higher address.
129 */
130#define XEN_ELFNOTE_HV_START_LOW 12
131
132/*
133 * List of maddr_t-sized mask/value pairs describing how to recognize
134 * (non-present) L1 page table entries carrying valid MFNs (numeric).
135 */
136#define XEN_ELFNOTE_L1_MFN_VALID 13
137
138/*
139 * Whether or not the guest supports cooperative suspend cancellation.
140 */
141#define XEN_ELFNOTE_SUSPEND_CANCEL 14
142
123#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ 143#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
124 144
125/* 145/*
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index d73228d16488..f51b6413b054 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -38,6 +38,9 @@
38 */ 38 */
39#define XENFEAT_pae_pgdir_above_4gb 4 39#define XENFEAT_pae_pgdir_above_4gb 4
40 40
41/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
42#define XENFEAT_mmu_pt_update_preserve_ad 5
43
41#define XENFEAT_NR_SUBMAPS 1 44#define XENFEAT_NR_SUBMAPS 1
42 45
43#endif /* __XEN_PUBLIC_FEATURES_H__ */ 46#endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h
index 5a934dd7796d..974a51ed9165 100644
--- a/include/xen/interface/io/fbif.h
+++ b/include/xen/interface/io/fbif.h
@@ -49,11 +49,27 @@ struct xenfb_update {
49 int32_t height; /* rect height */ 49 int32_t height; /* rect height */
50}; 50};
51 51
52/*
53 * Framebuffer resize notification event
54 * Capable backend sets feature-resize in xenstore.
55 */
56#define XENFB_TYPE_RESIZE 3
57
58struct xenfb_resize {
59 uint8_t type; /* XENFB_TYPE_RESIZE */
60 int32_t width; /* width in pixels */
61 int32_t height; /* height in pixels */
62 int32_t stride; /* stride in bytes */
63 int32_t depth; /* depth in bits */
64 int32_t offset; /* start offset within framebuffer */
65};
66
52#define XENFB_OUT_EVENT_SIZE 40 67#define XENFB_OUT_EVENT_SIZE 40
53 68
54union xenfb_out_event { 69union xenfb_out_event {
55 uint8_t type; 70 uint8_t type;
56 struct xenfb_update update; 71 struct xenfb_update update;
72 struct xenfb_resize resize;
57 char pad[XENFB_OUT_EVENT_SIZE]; 73 char pad[XENFB_OUT_EVENT_SIZE];
58}; 74};
59 75
@@ -105,15 +121,18 @@ struct xenfb_page {
105 * Each directory page holds PAGE_SIZE / sizeof(*pd) 121 * Each directory page holds PAGE_SIZE / sizeof(*pd)
106 * framebuffer pages, and can thus map up to PAGE_SIZE * 122 * framebuffer pages, and can thus map up to PAGE_SIZE *
107 * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and 123 * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
108 * sizeof(unsigned long) == 4, that's 4 Megs. Two directory 124 * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2
109 * pages should be enough for a while. 125 * Megs 64 bit. 256 directories give enough room for a 512
126 * Meg framebuffer with a max resolution of 12,800x10,240.
127 * Should be enough for a while with room leftover for
128 * expansion.
110 */ 129 */
111 unsigned long pd[2]; 130 unsigned long pd[256];
112}; 131};
113 132
114/* 133/*
115 * Wart: xenkbd needs to know resolution. Put it here until a better 134 * Wart: xenkbd needs to know default resolution. Put it here until a
116 * solution is found, but don't leak it to the backend. 135 * better solution is found, but don't leak it to the backend.
117 */ 136 */
118#ifdef __KERNEL__ 137#ifdef __KERNEL__
119#define XENFB_WIDTH 800 138#define XENFB_WIDTH 800
diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h
index fb97f4284ffd..8066c7849fbe 100644
--- a/include/xen/interface/io/kbdif.h
+++ b/include/xen/interface/io/kbdif.h
@@ -49,6 +49,7 @@ struct xenkbd_motion {
49 uint8_t type; /* XENKBD_TYPE_MOTION */ 49 uint8_t type; /* XENKBD_TYPE_MOTION */
50 int32_t rel_x; /* relative X motion */ 50 int32_t rel_x; /* relative X motion */
51 int32_t rel_y; /* relative Y motion */ 51 int32_t rel_y; /* relative Y motion */
52 int32_t rel_z; /* relative Z motion (wheel) */
52}; 53};
53 54
54struct xenkbd_key { 55struct xenkbd_key {
@@ -61,6 +62,7 @@ struct xenkbd_position {
61 uint8_t type; /* XENKBD_TYPE_POS */ 62 uint8_t type; /* XENKBD_TYPE_POS */
62 int32_t abs_x; /* absolute X position (in FB pixels) */ 63 int32_t abs_x; /* absolute X position (in FB pixels) */
63 int32_t abs_y; /* absolute Y position (in FB pixels) */ 64 int32_t abs_y; /* absolute Y position (in FB pixels) */
65 int32_t rel_z; /* relative Z motion (wheel) */
64}; 66};
65 67
66#define XENKBD_IN_EVENT_SIZE 40 68#define XENKBD_IN_EVENT_SIZE 40
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index da768469aa92..af36ead16817 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
29 * OUT: GMFN bases of extents that were allocated 29 * OUT: GMFN bases of extents that were allocated
30 * (NB. This command also updates the mach_to_phys translation table) 30 * (NB. This command also updates the mach_to_phys translation table)
31 */ 31 */
32 ulong extent_start; 32 GUEST_HANDLE(ulong) extent_start;
33 33
34 /* Number of extents, and size/alignment of each (2^extent_order pages). */ 34 /* Number of extents, and size/alignment of each (2^extent_order pages). */
35 unsigned long nr_extents; 35 unsigned long nr_extents;
@@ -50,6 +50,7 @@ struct xen_memory_reservation {
50 domid_t domid; 50 domid_t domid;
51 51
52}; 52};
53DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
53 54
54/* 55/*
55 * Returns the maximum machine frame number of mapped RAM in this system. 56 * Returns the maximum machine frame number of mapped RAM in this system.
@@ -85,7 +86,7 @@ struct xen_machphys_mfn_list {
85 * any large discontiguities in the machine address space, 2MB gaps in 86 * any large discontiguities in the machine address space, 2MB gaps in
86 * the machphys table will be represented by an MFN base of zero. 87 * the machphys table will be represented by an MFN base of zero.
87 */ 88 */
88 ulong extent_start; 89 GUEST_HANDLE(ulong) extent_start;
89 90
90 /* 91 /*
91 * Number of extents written to the above array. This will be smaller 92 * Number of extents written to the above array. This will be smaller
@@ -93,6 +94,7 @@ struct xen_machphys_mfn_list {
93 */ 94 */
94 unsigned int nr_extents; 95 unsigned int nr_extents;
95}; 96};
97DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
96 98
97/* 99/*
98 * Sets the GPFN at which a particular page appears in the specified guest's 100 * Sets the GPFN at which a particular page appears in the specified guest's
@@ -115,6 +117,7 @@ struct xen_add_to_physmap {
115 /* GPFN where the source mapping page should appear. */ 117 /* GPFN where the source mapping page should appear. */
116 unsigned long gpfn; 118 unsigned long gpfn;
117}; 119};
120DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
118 121
119/* 122/*
120 * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error 123 * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
@@ -129,13 +132,14 @@ struct xen_translate_gpfn_list {
129 unsigned long nr_gpfns; 132 unsigned long nr_gpfns;
130 133
131 /* List of GPFNs to translate. */ 134 /* List of GPFNs to translate. */
132 ulong gpfn_list; 135 GUEST_HANDLE(ulong) gpfn_list;
133 136
134 /* 137 /*
135 * Output list to contain MFN translations. May be the same as the input 138 * Output list to contain MFN translations. May be the same as the input
136 * list (in which case each input GPFN is overwritten with the output MFN). 139 * list (in which case each input GPFN is overwritten with the output MFN).
137 */ 140 */
138 ulong mfn_list; 141 GUEST_HANDLE(ulong) mfn_list;
139}; 142};
143DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
140 144
141#endif /* __XEN_PUBLIC_MEMORY_H__ */ 145#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 819a0331cda9..2befa3e2f1bc 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -114,9 +114,14 @@
114 * ptr[:2] -- Machine address within the frame whose mapping to modify. 114 * ptr[:2] -- Machine address within the frame whose mapping to modify.
115 * The frame must belong to the FD, if one is specified. 115 * The frame must belong to the FD, if one is specified.
116 * val -- Value to write into the mapping entry. 116 * val -- Value to write into the mapping entry.
117 *
118 * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
119 * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
120 * with those in @val.
117 */ 121 */
118#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ 122#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
119#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ 123#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
124#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */
120 125
121/* 126/*
122 * MMU EXTENDED OPERATIONS 127 * MMU EXTENDED OPERATIONS
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 10ddfe0142d0..a706d6a78960 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,4 +5,10 @@
5 5
6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); 6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
7 7
8void xen_pre_suspend(void);
9void xen_post_suspend(int suspend_cancelled);
10
11void xen_mm_pin_all(void);
12void xen_mm_unpin_all(void);
13
8#endif /* INCLUDE_XEN_OPS_H */ 14#endif /* INCLUDE_XEN_OPS_H */
diff --git a/init/calibrate.c b/init/calibrate.c
index ecb3822d4f70..7963e3fc51d9 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -8,7 +8,9 @@
8#include <linux/delay.h> 8#include <linux/delay.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/timex.h> 10#include <linux/timex.h>
11#include <linux/smp.h>
11 12
13unsigned long lpj_fine;
12unsigned long preset_lpj; 14unsigned long preset_lpj;
13static int __init lpj_setup(char *str) 15static int __init lpj_setup(char *str)
14{ 16{
@@ -33,9 +35,9 @@ static unsigned long __cpuinit calibrate_delay_direct(void)
33 unsigned long pre_start, start, post_start; 35 unsigned long pre_start, start, post_start;
34 unsigned long pre_end, end, post_end; 36 unsigned long pre_end, end, post_end;
35 unsigned long start_jiffies; 37 unsigned long start_jiffies;
36 unsigned long tsc_rate_min, tsc_rate_max; 38 unsigned long timer_rate_min, timer_rate_max;
37 unsigned long good_tsc_sum = 0; 39 unsigned long good_timer_sum = 0;
38 unsigned long good_tsc_count = 0; 40 unsigned long good_timer_count = 0;
39 int i; 41 int i;
40 42
41 if (read_current_timer(&pre_start) < 0 ) 43 if (read_current_timer(&pre_start) < 0 )
@@ -79,22 +81,24 @@ static unsigned long __cpuinit calibrate_delay_direct(void)
79 } 81 }
80 read_current_timer(&post_end); 82 read_current_timer(&post_end);
81 83
82 tsc_rate_max = (post_end - pre_start) / DELAY_CALIBRATION_TICKS; 84 timer_rate_max = (post_end - pre_start) /
83 tsc_rate_min = (pre_end - post_start) / DELAY_CALIBRATION_TICKS; 85 DELAY_CALIBRATION_TICKS;
86 timer_rate_min = (pre_end - post_start) /
87 DELAY_CALIBRATION_TICKS;
84 88
85 /* 89 /*
86 * If the upper limit and lower limit of the tsc_rate is 90 * If the upper limit and lower limit of the timer_rate is
87 * >= 12.5% apart, redo calibration. 91 * >= 12.5% apart, redo calibration.
88 */ 92 */
89 if (pre_start != 0 && pre_end != 0 && 93 if (pre_start != 0 && pre_end != 0 &&
90 (tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) { 94 (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
91 good_tsc_count++; 95 good_timer_count++;
92 good_tsc_sum += tsc_rate_max; 96 good_timer_sum += timer_rate_max;
93 } 97 }
94 } 98 }
95 99
96 if (good_tsc_count) 100 if (good_timer_count)
97 return (good_tsc_sum/good_tsc_count); 101 return (good_timer_sum/good_timer_count);
98 102
99 printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " 103 printk(KERN_WARNING "calibrate_delay_direct() failed to get a good "
100 "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); 104 "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n");
@@ -108,6 +112,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
108 * This is the number of bits of precision for the loops_per_jiffy. Each 112 * This is the number of bits of precision for the loops_per_jiffy. Each
109 * bit takes on average 1.5/HZ seconds. This (like the original) is a little 113 * bit takes on average 1.5/HZ seconds. This (like the original) is a little
110 * better than 1% 114 * better than 1%
115 * For the boot cpu we can skip the delay calibration and assign it a value
116 * calculated based on the timer frequency.
117 * For the rest of the CPUs we cannot assume that the timer frequency is same as
118 * the cpu frequency, hence do the calibration for those.
111 */ 119 */
112#define LPS_PREC 8 120#define LPS_PREC 8
113 121
@@ -118,20 +126,20 @@ void __cpuinit calibrate_delay(void)
118 126
119 if (preset_lpj) { 127 if (preset_lpj) {
120 loops_per_jiffy = preset_lpj; 128 loops_per_jiffy = preset_lpj;
121 printk("Calibrating delay loop (skipped)... " 129 printk(KERN_INFO
122 "%lu.%02lu BogoMIPS preset\n", 130 "Calibrating delay loop (skipped) preset value.. ");
123 loops_per_jiffy/(500000/HZ), 131 } else if ((smp_processor_id() == 0) && lpj_fine) {
124 (loops_per_jiffy/(5000/HZ)) % 100); 132 loops_per_jiffy = lpj_fine;
133 printk(KERN_INFO
134 "Calibrating delay loop (skipped), "
135 "value calculated using timer frequency.. ");
125 } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { 136 } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
126 printk("Calibrating delay using timer specific routine.. "); 137 printk(KERN_INFO
127 printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", 138 "Calibrating delay using timer specific routine.. ");
128 loops_per_jiffy/(500000/HZ),
129 (loops_per_jiffy/(5000/HZ)) % 100,
130 loops_per_jiffy);
131 } else { 139 } else {
132 loops_per_jiffy = (1<<12); 140 loops_per_jiffy = (1<<12);
133 141
134 printk(KERN_DEBUG "Calibrating delay loop... "); 142 printk(KERN_INFO "Calibrating delay loop... ");
135 while ((loops_per_jiffy <<= 1) != 0) { 143 while ((loops_per_jiffy <<= 1) != 0) {
136 /* wait for "start of" clock tick */ 144 /* wait for "start of" clock tick */
137 ticks = jiffies; 145 ticks = jiffies;
@@ -161,12 +169,8 @@ void __cpuinit calibrate_delay(void)
161 if (jiffies != ticks) /* longer than 1 tick */ 169 if (jiffies != ticks) /* longer than 1 tick */
162 loops_per_jiffy &= ~loopbit; 170 loops_per_jiffy &= ~loopbit;
163 } 171 }
164
165 /* Round the value and print it */
166 printk("%lu.%02lu BogoMIPS (lpj=%lu)\n",
167 loops_per_jiffy/(500000/HZ),
168 (loops_per_jiffy/(5000/HZ)) % 100,
169 loops_per_jiffy);
170 } 172 }
171 173 printk(KERN_INFO "%lu.%02lu BogoMIPS (lpj=%lu)\n",
174 loops_per_jiffy/(500000/HZ),
175 (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
172} 176}
diff --git a/kernel/printk.c b/kernel/printk.c
index e2129e83fd75..625d240d7ada 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -75,6 +75,8 @@ EXPORT_SYMBOL(oops_in_progress);
75static DECLARE_MUTEX(console_sem); 75static DECLARE_MUTEX(console_sem);
76static DECLARE_MUTEX(secondary_console_sem); 76static DECLARE_MUTEX(secondary_console_sem);
77struct console *console_drivers; 77struct console *console_drivers;
78EXPORT_SYMBOL_GPL(console_drivers);
79
78/* 80/*
79 * This is used for debugging the mess that is the VT code by 81 * This is used for debugging the mess that is the VT code by
80 * keeping track if we have the console semaphore held. It's 82 * keeping track if we have the console semaphore held. It's
@@ -121,6 +123,8 @@ struct console_cmdline
121static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; 123static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
122static int selected_console = -1; 124static int selected_console = -1;
123static int preferred_console = -1; 125static int preferred_console = -1;
126int console_set_on_cmdline;
127EXPORT_SYMBOL(console_set_on_cmdline);
124 128
125/* Flag: console code may call schedule() */ 129/* Flag: console code may call schedule() */
126static int console_may_schedule; 130static int console_may_schedule;
@@ -890,6 +894,7 @@ static int __init console_setup(char *str)
890 *s = 0; 894 *s = 0;
891 895
892 __add_preferred_console(buf, idx, options, brl_options); 896 __add_preferred_console(buf, idx, options, brl_options);
897 console_set_on_cmdline = 1;
893 return 1; 898 return 1;
894} 899}
895__setup("console=", console_setup); 900__setup("console=", console_setup);
diff --git a/kernel/sched.c b/kernel/sched.c
index 4e2f60335656..8402944f715b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6539,9 +6539,9 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
6539 6539
6540 min_val = INT_MAX; 6540 min_val = INT_MAX;
6541 6541
6542 for (i = 0; i < MAX_NUMNODES; i++) { 6542 for (i = 0; i < nr_node_ids; i++) {
6543 /* Start at @node */ 6543 /* Start at @node */
6544 n = (node + i) % MAX_NUMNODES; 6544 n = (node + i) % nr_node_ids;
6545 6545
6546 if (!nr_cpus_node(n)) 6546 if (!nr_cpus_node(n))
6547 continue; 6547 continue;
@@ -6735,7 +6735,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
6735 if (!sched_group_nodes) 6735 if (!sched_group_nodes)
6736 continue; 6736 continue;
6737 6737
6738 for (i = 0; i < MAX_NUMNODES; i++) { 6738 for (i = 0; i < nr_node_ids; i++) {
6739 struct sched_group *oldsg, *sg = sched_group_nodes[i]; 6739 struct sched_group *oldsg, *sg = sched_group_nodes[i];
6740 6740
6741 *nodemask = node_to_cpumask(i); 6741 *nodemask = node_to_cpumask(i);
@@ -6928,7 +6928,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
6928 /* 6928 /*
6929 * Allocate the per-node list of sched groups 6929 * Allocate the per-node list of sched groups
6930 */ 6930 */
6931 sched_group_nodes = kcalloc(MAX_NUMNODES, sizeof(struct sched_group *), 6931 sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *),
6932 GFP_KERNEL); 6932 GFP_KERNEL);
6933 if (!sched_group_nodes) { 6933 if (!sched_group_nodes) {
6934 printk(KERN_WARNING "Can not alloc sched group node list\n"); 6934 printk(KERN_WARNING "Can not alloc sched group node list\n");
@@ -7067,7 +7067,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7067#endif 7067#endif
7068 7068
7069 /* Set up physical groups */ 7069 /* Set up physical groups */
7070 for (i = 0; i < MAX_NUMNODES; i++) { 7070 for (i = 0; i < nr_node_ids; i++) {
7071 SCHED_CPUMASK_VAR(nodemask, allmasks); 7071 SCHED_CPUMASK_VAR(nodemask, allmasks);
7072 SCHED_CPUMASK_VAR(send_covered, allmasks); 7072 SCHED_CPUMASK_VAR(send_covered, allmasks);
7073 7073
@@ -7091,7 +7091,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7091 send_covered, tmpmask); 7091 send_covered, tmpmask);
7092 } 7092 }
7093 7093
7094 for (i = 0; i < MAX_NUMNODES; i++) { 7094 for (i = 0; i < nr_node_ids; i++) {
7095 /* Set up node groups */ 7095 /* Set up node groups */
7096 struct sched_group *sg, *prev; 7096 struct sched_group *sg, *prev;
7097 SCHED_CPUMASK_VAR(nodemask, allmasks); 7097 SCHED_CPUMASK_VAR(nodemask, allmasks);
@@ -7130,9 +7130,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7130 cpus_or(*covered, *covered, *nodemask); 7130 cpus_or(*covered, *covered, *nodemask);
7131 prev = sg; 7131 prev = sg;
7132 7132
7133 for (j = 0; j < MAX_NUMNODES; j++) { 7133 for (j = 0; j < nr_node_ids; j++) {
7134 SCHED_CPUMASK_VAR(notcovered, allmasks); 7134 SCHED_CPUMASK_VAR(notcovered, allmasks);
7135 int n = (i + j) % MAX_NUMNODES; 7135 int n = (i + j) % nr_node_ids;
7136 node_to_cpumask_ptr(pnodemask, n); 7136 node_to_cpumask_ptr(pnodemask, n);
7137 7137
7138 cpus_complement(*notcovered, *covered); 7138 cpus_complement(*notcovered, *covered);
@@ -7185,7 +7185,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7185 } 7185 }
7186 7186
7187#ifdef CONFIG_NUMA 7187#ifdef CONFIG_NUMA
7188 for (i = 0; i < MAX_NUMNODES; i++) 7188 for (i = 0; i < nr_node_ids; i++)
7189 init_numa_sched_groups_power(sched_group_nodes[i]); 7189 init_numa_sched_groups_power(sched_group_nodes[i]);
7190 7190
7191 if (sd_allnodes) { 7191 if (sd_allnodes) {
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 57a1f02e5ec0..67f80c261709 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -30,6 +30,7 @@
30struct tick_device tick_broadcast_device; 30struct tick_device tick_broadcast_device;
31static cpumask_t tick_broadcast_mask; 31static cpumask_t tick_broadcast_mask;
32static DEFINE_SPINLOCK(tick_broadcast_lock); 32static DEFINE_SPINLOCK(tick_broadcast_lock);
33static int tick_broadcast_force;
33 34
34#ifdef CONFIG_TICK_ONESHOT 35#ifdef CONFIG_TICK_ONESHOT
35static void tick_broadcast_clear_oneshot(int cpu); 36static void tick_broadcast_clear_oneshot(int cpu);
@@ -232,10 +233,11 @@ static void tick_do_broadcast_on_off(void *why)
232 CLOCK_EVT_MODE_SHUTDOWN); 233 CLOCK_EVT_MODE_SHUTDOWN);
233 } 234 }
234 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) 235 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
235 dev->features |= CLOCK_EVT_FEAT_DUMMY; 236 tick_broadcast_force = 1;
236 break; 237 break;
237 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 238 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
238 if (cpu_isset(cpu, tick_broadcast_mask)) { 239 if (!tick_broadcast_force &&
240 cpu_isset(cpu, tick_broadcast_mask)) {
239 cpu_clear(cpu, tick_broadcast_mask); 241 cpu_clear(cpu, tick_broadcast_mask);
240 if (td->mode == TICKDEV_MODE_PERIODIC) 242 if (td->mode == TICKDEV_MODE_PERIODIC)
241 tick_setup_periodic(dev, 0); 243 tick_setup_periodic(dev, 0);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a5bf31c27375..acfe7c8d72fc 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -47,19 +47,17 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
47 if (pte_present(oldpte)) { 47 if (pte_present(oldpte)) {
48 pte_t ptent; 48 pte_t ptent;
49 49
50 /* Avoid an SMP race with hardware updated dirty/clean 50 ptent = ptep_modify_prot_start(mm, addr, pte);
51 * bits by wiping the pte and then setting the new pte
52 * into place.
53 */
54 ptent = ptep_get_and_clear(mm, addr, pte);
55 ptent = pte_modify(ptent, newprot); 51 ptent = pte_modify(ptent, newprot);
52
56 /* 53 /*
57 * Avoid taking write faults for pages we know to be 54 * Avoid taking write faults for pages we know to be
58 * dirty. 55 * dirty.
59 */ 56 */
60 if (dirty_accountable && pte_dirty(ptent)) 57 if (dirty_accountable && pte_dirty(ptent))
61 ptent = pte_mkwrite(ptent); 58 ptent = pte_mkwrite(ptent);
62 set_pte_at(mm, addr, pte, ptent); 59
60 ptep_modify_prot_commit(mm, addr, pte, ptent);
63#ifdef CONFIG_MIGRATION 61#ifdef CONFIG_MIGRATION
64 } else if (!pte_file(oldpte)) { 62 } else if (!pte_file(oldpte)) {
65 swp_entry_t entry = pte_to_swp_entry(oldpte); 63 swp_entry_t entry = pte_to_swp_entry(oldpte);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f32fae3121f0..f024b9b3a2a6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2929,6 +2929,18 @@ void __init free_bootmem_with_active_regions(int nid,
2929 } 2929 }
2930} 2930}
2931 2931
2932void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
2933{
2934 int i;
2935 int ret;
2936
2937 for_each_active_range_index_in_nid(i, nid) {
2938 ret = work_fn(early_node_map[i].start_pfn,
2939 early_node_map[i].end_pfn, data);
2940 if (ret)
2941 break;
2942 }
2943}
2932/** 2944/**
2933 * sparse_memory_present_with_active_regions - Call memory_present for each active range 2945 * sparse_memory_present_with_active_regions - Call memory_present for each active range
2934 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 2946 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3461,6 +3473,11 @@ void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat,
3461 calculate_node_totalpages(pgdat, zones_size, zholes_size); 3473 calculate_node_totalpages(pgdat, zones_size, zholes_size);
3462 3474
3463 alloc_node_mem_map(pgdat); 3475 alloc_node_mem_map(pgdat);
3476#ifdef CONFIG_FLAT_NODE_MEM_MAP
3477 printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
3478 nid, (unsigned long)pgdat,
3479 (unsigned long)pgdat->node_mem_map);
3480#endif
3464 3481
3465 free_area_init_core(pgdat, zones_size, zholes_size); 3482 free_area_init_core(pgdat, zones_size, zholes_size);
3466} 3483}
@@ -3503,7 +3520,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
3503{ 3520{
3504 int i; 3521 int i;
3505 3522
3506 printk(KERN_DEBUG "Entering add_active_range(%d, %lu, %lu) " 3523 printk(KERN_DEBUG "Entering add_active_range(%d, %#lx, %#lx) "
3507 "%d entries of %d used\n", 3524 "%d entries of %d used\n",
3508 nid, start_pfn, end_pfn, 3525 nid, start_pfn, end_pfn,
3509 nr_nodemap_entries, MAX_ACTIVE_REGIONS); 3526 nr_nodemap_entries, MAX_ACTIVE_REGIONS);
@@ -3547,27 +3564,68 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
3547} 3564}
3548 3565
3549/** 3566/**
3550 * shrink_active_range - Shrink an existing registered range of PFNs 3567 * remove_active_range - Shrink an existing registered range of PFNs
3551 * @nid: The node id the range is on that should be shrunk 3568 * @nid: The node id the range is on that should be shrunk
3552 * @old_end_pfn: The old end PFN of the range 3569 * @start_pfn: The new PFN of the range
3553 * @new_end_pfn: The new PFN of the range 3570 * @end_pfn: The new PFN of the range
3554 * 3571 *
3555 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node. 3572 * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
3556 * The map is kept at the end physical page range that has already been 3573 * The map is kept near the end physical page range that has already been
3557 * registered with add_active_range(). This function allows an arch to shrink 3574 * registered. This function allows an arch to shrink an existing registered
3558 * an existing registered range. 3575 * range.
3559 */ 3576 */
3560void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn, 3577void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
3561 unsigned long new_end_pfn) 3578 unsigned long end_pfn)
3562{ 3579{
3563 int i; 3580 int i, j;
3581 int removed = 0;
3582
3583 printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
3584 nid, start_pfn, end_pfn);
3564 3585
3565 /* Find the old active region end and shrink */ 3586 /* Find the old active region end and shrink */
3566 for_each_active_range_index_in_nid(i, nid) 3587 for_each_active_range_index_in_nid(i, nid) {
3567 if (early_node_map[i].end_pfn == old_end_pfn) { 3588 if (early_node_map[i].start_pfn >= start_pfn &&
3568 early_node_map[i].end_pfn = new_end_pfn; 3589 early_node_map[i].end_pfn <= end_pfn) {
3569 break; 3590 /* clear it */
3591 early_node_map[i].start_pfn = 0;
3592 early_node_map[i].end_pfn = 0;
3593 removed = 1;
3594 continue;
3595 }
3596 if (early_node_map[i].start_pfn < start_pfn &&
3597 early_node_map[i].end_pfn > start_pfn) {
3598 unsigned long temp_end_pfn = early_node_map[i].end_pfn;
3599 early_node_map[i].end_pfn = start_pfn;
3600 if (temp_end_pfn > end_pfn)
3601 add_active_range(nid, end_pfn, temp_end_pfn);
3602 continue;
3570 } 3603 }
3604 if (early_node_map[i].start_pfn >= start_pfn &&
3605 early_node_map[i].end_pfn > end_pfn &&
3606 early_node_map[i].start_pfn < end_pfn) {
3607 early_node_map[i].start_pfn = end_pfn;
3608 continue;
3609 }
3610 }
3611
3612 if (!removed)
3613 return;
3614
3615 /* remove the blank ones */
3616 for (i = nr_nodemap_entries - 1; i > 0; i--) {
3617 if (early_node_map[i].nid != nid)
3618 continue;
3619 if (early_node_map[i].end_pfn)
3620 continue;
3621 /* we found it, get rid of it */
3622 for (j = i; j < nr_nodemap_entries - 1; j++)
3623 memcpy(&early_node_map[j], &early_node_map[j+1],
3624 sizeof(early_node_map[j]));
3625 j = nr_nodemap_entries - 1;
3626 memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
3627 nr_nodemap_entries--;
3628 }
3571} 3629}
3572 3630
3573/** 3631/**
@@ -3611,7 +3669,7 @@ static void __init sort_node_map(void)
3611} 3669}
3612 3670
3613/* Find the lowest pfn for a node */ 3671/* Find the lowest pfn for a node */
3614unsigned long __init find_min_pfn_for_node(unsigned long nid) 3672unsigned long __init find_min_pfn_for_node(int nid)
3615{ 3673{
3616 int i; 3674 int i;
3617 unsigned long min_pfn = ULONG_MAX; 3675 unsigned long min_pfn = ULONG_MAX;
@@ -3622,7 +3680,7 @@ unsigned long __init find_min_pfn_for_node(unsigned long nid)
3622 3680
3623 if (min_pfn == ULONG_MAX) { 3681 if (min_pfn == ULONG_MAX) {
3624 printk(KERN_WARNING 3682 printk(KERN_WARNING
3625 "Could not find start_pfn for node %lu\n", nid); 3683 "Could not find start_pfn for node %d\n", nid);
3626 return 0; 3684 return 0;
3627 } 3685 }
3628 3686
@@ -3878,7 +3936,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
3878 for (i = 0; i < MAX_NR_ZONES; i++) { 3936 for (i = 0; i < MAX_NR_ZONES; i++) {
3879 if (i == ZONE_MOVABLE) 3937 if (i == ZONE_MOVABLE)
3880 continue; 3938 continue;
3881 printk(" %-8s %8lu -> %8lu\n", 3939 printk(" %-8s %0#10lx -> %0#10lx\n",
3882 zone_names[i], 3940 zone_names[i],
3883 arch_zone_lowest_possible_pfn[i], 3941 arch_zone_lowest_possible_pfn[i],
3884 arch_zone_highest_possible_pfn[i]); 3942 arch_zone_highest_possible_pfn[i]);
@@ -3894,7 +3952,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
3894 /* Print out the early_node_map[] */ 3952 /* Print out the early_node_map[] */
3895 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries); 3953 printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
3896 for (i = 0; i < nr_nodemap_entries; i++) 3954 for (i = 0; i < nr_nodemap_entries; i++)
3897 printk(" %3d: %8lu -> %8lu\n", early_node_map[i].nid, 3955 printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
3898 early_node_map[i].start_pfn, 3956 early_node_map[i].start_pfn,
3899 early_node_map[i].end_pfn); 3957 early_node_map[i].end_pfn);
3900 3958
diff --git a/sound/oss/vwsnd.c b/sound/oss/vwsnd.c
index 2c5aaa58046d..dcbb3f739e61 100644
--- a/sound/oss/vwsnd.c
+++ b/sound/oss/vwsnd.c
@@ -150,7 +150,7 @@
150#include <linux/interrupt.h> 150#include <linux/interrupt.h>
151#include <linux/mutex.h> 151#include <linux/mutex.h>
152 152
153#include <asm/mach-visws/cobalt.h> 153#include <asm/visws/cobalt.h>
154 154
155#include "sound_config.h" 155#include "sound_config.h"
156 156