diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-02-15 04:24:31 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-02-15 04:24:31 -0500 |
commit | 0a9d59a2461477bd9ed143c01af9df3f8f00fa81 (patch) | |
tree | df997d1cfb0786427a0df1fbd6f0640fa4248cf4 /arch/x86 | |
parent | a23ce6da9677d245aa0aadc99f4197030350ab54 (diff) | |
parent | 795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff) |
Merge branch 'master' into for-next
Diffstat (limited to 'arch/x86')
64 files changed, 1262 insertions, 666 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 36ed2e2c896..d5ed94d30aa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -627,11 +627,11 @@ config APB_TIMER | |||
627 | as it is off-chip. APB timers are always running regardless of CPU | 627 | as it is off-chip. APB timers are always running regardless of CPU |
628 | C states, they are used as per CPU clockevent device when possible. | 628 | C states, they are used as per CPU clockevent device when possible. |
629 | 629 | ||
630 | # Mark as embedded because too many people got it wrong. | 630 | # Mark as expert because too many people got it wrong. |
631 | # The code disables itself when not needed. | 631 | # The code disables itself when not needed. |
632 | config DMI | 632 | config DMI |
633 | default y | 633 | default y |
634 | bool "Enable DMI scanning" if EMBEDDED | 634 | bool "Enable DMI scanning" if EXPERT |
635 | ---help--- | 635 | ---help--- |
636 | Enabled scanning of DMI to identify machine quirks. Say Y | 636 | Enabled scanning of DMI to identify machine quirks. Say Y |
637 | here unless you have verified that your setup is not | 637 | here unless you have verified that your setup is not |
@@ -639,7 +639,7 @@ config DMI | |||
639 | BIOS code. | 639 | BIOS code. |
640 | 640 | ||
641 | config GART_IOMMU | 641 | config GART_IOMMU |
642 | bool "GART IOMMU support" if EMBEDDED | 642 | bool "GART IOMMU support" if EXPERT |
643 | default y | 643 | default y |
644 | select SWIOTLB | 644 | select SWIOTLB |
645 | depends on X86_64 && PCI && AMD_NB | 645 | depends on X86_64 && PCI && AMD_NB |
@@ -889,7 +889,7 @@ config X86_THERMAL_VECTOR | |||
889 | depends on X86_MCE_INTEL | 889 | depends on X86_MCE_INTEL |
890 | 890 | ||
891 | config VM86 | 891 | config VM86 |
892 | bool "Enable VM86 support" if EMBEDDED | 892 | bool "Enable VM86 support" if EXPERT |
893 | default y | 893 | default y |
894 | depends on X86_32 | 894 | depends on X86_32 |
895 | ---help--- | 895 | ---help--- |
@@ -1073,7 +1073,7 @@ endchoice | |||
1073 | 1073 | ||
1074 | choice | 1074 | choice |
1075 | depends on EXPERIMENTAL | 1075 | depends on EXPERIMENTAL |
1076 | prompt "Memory split" if EMBEDDED | 1076 | prompt "Memory split" if EXPERT |
1077 | default VMSPLIT_3G | 1077 | default VMSPLIT_3G |
1078 | depends on X86_32 | 1078 | depends on X86_32 |
1079 | ---help--- | 1079 | ---help--- |
@@ -1135,7 +1135,7 @@ config ARCH_DMA_ADDR_T_64BIT | |||
1135 | def_bool X86_64 || HIGHMEM64G | 1135 | def_bool X86_64 || HIGHMEM64G |
1136 | 1136 | ||
1137 | config DIRECT_GBPAGES | 1137 | config DIRECT_GBPAGES |
1138 | bool "Enable 1GB pages for kernel pagetables" if EMBEDDED | 1138 | bool "Enable 1GB pages for kernel pagetables" if EXPERT |
1139 | default y | 1139 | default y |
1140 | depends on X86_64 | 1140 | depends on X86_64 |
1141 | ---help--- | 1141 | ---help--- |
@@ -1369,7 +1369,7 @@ config MATH_EMULATION | |||
1369 | 1369 | ||
1370 | config MTRR | 1370 | config MTRR |
1371 | def_bool y | 1371 | def_bool y |
1372 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | 1372 | prompt "MTRR (Memory Type Range Register) support" if EXPERT |
1373 | ---help--- | 1373 | ---help--- |
1374 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1374 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
1375 | the Memory Type Range Registers (MTRRs) may be used to control | 1375 | the Memory Type Range Registers (MTRRs) may be used to control |
@@ -1435,7 +1435,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
1435 | 1435 | ||
1436 | config X86_PAT | 1436 | config X86_PAT |
1437 | def_bool y | 1437 | def_bool y |
1438 | prompt "x86 PAT support" if EMBEDDED | 1438 | prompt "x86 PAT support" if EXPERT |
1439 | depends on MTRR | 1439 | depends on MTRR |
1440 | ---help--- | 1440 | ---help--- |
1441 | Use PAT attributes to setup page level cache control. | 1441 | Use PAT attributes to setup page level cache control. |
@@ -1539,7 +1539,7 @@ config KEXEC_JUMP | |||
1539 | code in physical address mode via KEXEC | 1539 | code in physical address mode via KEXEC |
1540 | 1540 | ||
1541 | config PHYSICAL_START | 1541 | config PHYSICAL_START |
1542 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 1542 | hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) |
1543 | default "0x1000000" | 1543 | default "0x1000000" |
1544 | ---help--- | 1544 | ---help--- |
1545 | This gives the physical address where the kernel is loaded. | 1545 | This gives the physical address where the kernel is loaded. |
@@ -1934,13 +1934,19 @@ config PCI_MMCONFIG | |||
1934 | depends on X86_64 && PCI && ACPI | 1934 | depends on X86_64 && PCI && ACPI |
1935 | 1935 | ||
1936 | config PCI_CNB20LE_QUIRK | 1936 | config PCI_CNB20LE_QUIRK |
1937 | bool "Read CNB20LE Host Bridge Windows" | 1937 | bool "Read CNB20LE Host Bridge Windows" if EXPERT |
1938 | depends on PCI | 1938 | default n |
1939 | depends on PCI && EXPERIMENTAL | ||
1939 | help | 1940 | help |
1940 | Read the PCI windows out of the CNB20LE host bridge. This allows | 1941 | Read the PCI windows out of the CNB20LE host bridge. This allows |
1941 | PCI hotplug to work on systems with the CNB20LE chipset which do | 1942 | PCI hotplug to work on systems with the CNB20LE chipset which do |
1942 | not have ACPI. | 1943 | not have ACPI. |
1943 | 1944 | ||
1945 | There's no public spec for this chipset, and this functionality | ||
1946 | is known to be incomplete. | ||
1947 | |||
1948 | You should say N unless you know you need this. | ||
1949 | |||
1944 | config DMAR | 1950 | config DMAR |
1945 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | 1951 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" |
1946 | depends on PCI_MSI && ACPI && EXPERIMENTAL | 1952 | depends on PCI_MSI && ACPI && EXPERIMENTAL |
@@ -2062,13 +2068,14 @@ config OLPC | |||
2062 | bool "One Laptop Per Child support" | 2068 | bool "One Laptop Per Child support" |
2063 | select GPIOLIB | 2069 | select GPIOLIB |
2064 | select OLPC_OPENFIRMWARE | 2070 | select OLPC_OPENFIRMWARE |
2071 | depends on !X86_64 && !X86_PAE | ||
2065 | ---help--- | 2072 | ---help--- |
2066 | Add support for detecting the unique features of the OLPC | 2073 | Add support for detecting the unique features of the OLPC |
2067 | XO hardware. | 2074 | XO hardware. |
2068 | 2075 | ||
2069 | config OLPC_XO1 | 2076 | config OLPC_XO1 |
2070 | tristate "OLPC XO-1 support" | 2077 | tristate "OLPC XO-1 support" |
2071 | depends on OLPC && PCI | 2078 | depends on OLPC && MFD_CS5535 |
2072 | ---help--- | 2079 | ---help--- |
2073 | Add support for non-essential features of the OLPC XO-1 laptop. | 2080 | Add support for non-essential features of the OLPC XO-1 laptop. |
2074 | 2081 | ||
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 15588a0ef46..283c5a6a03a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -424,7 +424,7 @@ config X86_DEBUGCTLMSR | |||
424 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML | 424 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML |
425 | 425 | ||
426 | menuconfig PROCESSOR_SELECT | 426 | menuconfig PROCESSOR_SELECT |
427 | bool "Supported processor vendors" if EMBEDDED | 427 | bool "Supported processor vendors" if EXPERT |
428 | ---help--- | 428 | ---help--- |
429 | This lets you choose what x86 vendor support code your kernel | 429 | This lets you choose what x86 vendor support code your kernel |
430 | will include. | 430 | will include. |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 45143bbcfe5..615e18810f4 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -31,7 +31,7 @@ config X86_VERBOSE_BOOTUP | |||
31 | see errors. Disable this if you want silent bootup. | 31 | see errors. Disable this if you want silent bootup. |
32 | 32 | ||
33 | config EARLY_PRINTK | 33 | config EARLY_PRINTK |
34 | bool "Early printk" if EMBEDDED | 34 | bool "Early printk" if EXPERT |
35 | default y | 35 | default y |
36 | ---help--- | 36 | ---help--- |
37 | Write kernel log output directly into the VGA buffer or to a serial | 37 | Write kernel log output directly into the VGA buffer or to a serial |
@@ -138,7 +138,7 @@ config DEBUG_NX_TEST | |||
138 | 138 | ||
139 | config DOUBLEFAULT | 139 | config DOUBLEFAULT |
140 | default y | 140 | default y |
141 | bool "Enable doublefault exception handler" if EMBEDDED | 141 | bool "Enable doublefault exception handler" if EXPERT |
142 | depends on X86_32 | 142 | depends on X86_32 |
143 | ---help--- | 143 | ---help--- |
144 | This option allows trapping of rare doublefault exceptions that | 144 | This option allows trapping of rare doublefault exceptions that |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 63e35ec9075..62f084478f7 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -1,48 +1,8 @@ | |||
1 | #ifndef _ASM_X86_CACHEFLUSH_H | 1 | #ifndef _ASM_X86_CACHEFLUSH_H |
2 | #define _ASM_X86_CACHEFLUSH_H | 2 | #define _ASM_X86_CACHEFLUSH_H |
3 | 3 | ||
4 | /* Keep includes the same across arches. */ | ||
5 | #include <linux/mm.h> | ||
6 | |||
7 | /* Caches aren't brain-dead on the intel. */ | 4 | /* Caches aren't brain-dead on the intel. */ |
8 | static inline void flush_cache_all(void) { } | 5 | #include <asm-generic/cacheflush.h> |
9 | static inline void flush_cache_mm(struct mm_struct *mm) { } | ||
10 | static inline void flush_cache_dup_mm(struct mm_struct *mm) { } | ||
11 | static inline void flush_cache_range(struct vm_area_struct *vma, | ||
12 | unsigned long start, unsigned long end) { } | ||
13 | static inline void flush_cache_page(struct vm_area_struct *vma, | ||
14 | unsigned long vmaddr, unsigned long pfn) { } | ||
15 | #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 | ||
16 | static inline void flush_dcache_page(struct page *page) { } | ||
17 | static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } | ||
18 | static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } | ||
19 | static inline void flush_icache_range(unsigned long start, | ||
20 | unsigned long end) { } | ||
21 | static inline void flush_icache_page(struct vm_area_struct *vma, | ||
22 | struct page *page) { } | ||
23 | static inline void flush_icache_user_range(struct vm_area_struct *vma, | ||
24 | struct page *page, | ||
25 | unsigned long addr, | ||
26 | unsigned long len) { } | ||
27 | static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } | ||
28 | static inline void flush_cache_vunmap(unsigned long start, | ||
29 | unsigned long end) { } | ||
30 | |||
31 | static inline void copy_to_user_page(struct vm_area_struct *vma, | ||
32 | struct page *page, unsigned long vaddr, | ||
33 | void *dst, const void *src, | ||
34 | unsigned long len) | ||
35 | { | ||
36 | memcpy(dst, src, len); | ||
37 | } | ||
38 | |||
39 | static inline void copy_from_user_page(struct vm_area_struct *vma, | ||
40 | struct page *page, unsigned long vaddr, | ||
41 | void *dst, const void *src, | ||
42 | unsigned long len) | ||
43 | { | ||
44 | memcpy(dst, src, len); | ||
45 | } | ||
46 | 6 | ||
47 | #ifdef CONFIG_X86_PAT | 7 | #ifdef CONFIG_X86_PAT |
48 | /* | 8 | /* |
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4fab24de26b..6e6e7558e70 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h | |||
@@ -32,5 +32,6 @@ extern void arch_unregister_cpu(int); | |||
32 | 32 | ||
33 | DECLARE_PER_CPU(int, cpu_state); | 33 | DECLARE_PER_CPU(int, cpu_state); |
34 | 34 | ||
35 | int __cpuinit mwait_usable(const struct cpuinfo_x86 *); | ||
35 | 36 | ||
36 | #endif /* _ASM_X86_CPU_H */ | 37 | #endif /* _ASM_X86_CPU_H */ |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index f52d42e8058..574dbc22893 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -14,7 +14,7 @@ | |||
14 | do { \ | 14 | do { \ |
15 | asm goto("1:" \ | 15 | asm goto("1:" \ |
16 | JUMP_LABEL_INITIAL_NOP \ | 16 | JUMP_LABEL_INITIAL_NOP \ |
17 | ".pushsection __jump_table, \"a\" \n\t"\ | 17 | ".pushsection __jump_table, \"aw\" \n\t"\ |
18 | _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ | 18 | _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ |
19 | ".popsection \n\t" \ | 19 | ".popsection \n\t" \ |
20 | : : "i" (key) : : label); \ | 20 | : : "i" (key) : : label); \ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aa75f21a9fb..ffd7f8d2918 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -822,6 +822,7 @@ extern bool kvm_rebooting; | |||
822 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 822 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
823 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 823 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
824 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 824 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
825 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | ||
825 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 826 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
826 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 827 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
827 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 828 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4a2d4e0c18d..8b5393ec108 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -36,8 +36,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
36 | unsigned cpu = smp_processor_id(); | 36 | unsigned cpu = smp_processor_id(); |
37 | 37 | ||
38 | if (likely(prev != next)) { | 38 | if (likely(prev != next)) { |
39 | /* stop flush ipis for the previous mm */ | ||
40 | cpumask_clear_cpu(cpu, mm_cpumask(prev)); | ||
41 | #ifdef CONFIG_SMP | 39 | #ifdef CONFIG_SMP |
42 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); | 40 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
43 | percpu_write(cpu_tlbstate.active_mm, next); | 41 | percpu_write(cpu_tlbstate.active_mm, next); |
@@ -47,6 +45,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
47 | /* Re-load page tables */ | 45 | /* Re-load page tables */ |
48 | load_cr3(next->pgd); | 46 | load_cr3(next->pgd); |
49 | 47 | ||
48 | /* stop flush ipis for the previous mm */ | ||
49 | cpumask_clear_cpu(cpu, mm_cpumask(prev)); | ||
50 | |||
50 | /* | 51 | /* |
51 | * load the LDT, if the LDT is different: | 52 | * load the LDT, if the LDT is different: |
52 | */ | 53 | */ |
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index a37229011b5..b0ef2b449a9 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_NUMA_32_H | 1 | #ifndef _ASM_X86_NUMA_32_H |
2 | #define _ASM_X86_NUMA_32_H | 2 | #define _ASM_X86_NUMA_32_H |
3 | 3 | ||
4 | extern int numa_off; | ||
5 | |||
4 | extern int pxm_to_nid(int pxm); | 6 | extern int pxm_to_nid(int pxm); |
5 | extern void numa_remove_cpu(int cpu); | 7 | extern void numa_remove_cpu(int cpu); |
6 | 8 | ||
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 5ae87285a50..0493be39607 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -40,6 +40,7 @@ extern void __cpuinit numa_remove_cpu(int cpu); | |||
40 | #ifdef CONFIG_NUMA_EMU | 40 | #ifdef CONFIG_NUMA_EMU |
41 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) | 41 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) |
42 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) | 42 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) |
43 | void numa_emu_cmdline(char *); | ||
43 | #endif /* CONFIG_NUMA_EMU */ | 44 | #endif /* CONFIG_NUMA_EMU */ |
44 | #else | 45 | #else |
45 | static inline void init_cpu_to_node(void) { } | 46 | static inline void init_cpu_to_node(void) { } |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7709c12431b..ebbc4d8ab17 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -435,6 +435,11 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr, | |||
435 | { | 435 | { |
436 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); | 436 | PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); |
437 | } | 437 | } |
438 | static inline void pmd_update(struct mm_struct *mm, unsigned long addr, | ||
439 | pmd_t *pmdp) | ||
440 | { | ||
441 | PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp); | ||
442 | } | ||
438 | 443 | ||
439 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, | 444 | static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, |
440 | pte_t *ptep) | 445 | pte_t *ptep) |
@@ -442,6 +447,12 @@ static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, | |||
442 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); | 447 | PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); |
443 | } | 448 | } |
444 | 449 | ||
450 | static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr, | ||
451 | pmd_t *pmdp) | ||
452 | { | ||
453 | PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp); | ||
454 | } | ||
455 | |||
445 | static inline pte_t __pte(pteval_t val) | 456 | static inline pte_t __pte(pteval_t val) |
446 | { | 457 | { |
447 | pteval_t ret; | 458 | pteval_t ret; |
@@ -543,6 +554,19 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
543 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); | 554 | PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); |
544 | } | 555 | } |
545 | 556 | ||
557 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
558 | static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
559 | pmd_t *pmdp, pmd_t pmd) | ||
560 | { | ||
561 | if (sizeof(pmdval_t) > sizeof(long)) | ||
562 | /* 5 arg words */ | ||
563 | pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd); | ||
564 | else | ||
565 | PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, | ||
566 | native_pmd_val(pmd)); | ||
567 | } | ||
568 | #endif | ||
569 | |||
546 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | 570 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
547 | { | 571 | { |
548 | pmdval_t val = native_pmd_val(pmd); | 572 | pmdval_t val = native_pmd_val(pmd); |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b82bac97525..82885099c86 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -265,10 +265,16 @@ struct pv_mmu_ops { | |||
265 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, | 265 | void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, |
266 | pte_t *ptep, pte_t pteval); | 266 | pte_t *ptep, pte_t pteval); |
267 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | 267 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); |
268 | void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, | ||
269 | pmd_t *pmdp, pmd_t pmdval); | ||
268 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, | 270 | void (*pte_update)(struct mm_struct *mm, unsigned long addr, |
269 | pte_t *ptep); | 271 | pte_t *ptep); |
270 | void (*pte_update_defer)(struct mm_struct *mm, | 272 | void (*pte_update_defer)(struct mm_struct *mm, |
271 | unsigned long addr, pte_t *ptep); | 273 | unsigned long addr, pte_t *ptep); |
274 | void (*pmd_update)(struct mm_struct *mm, unsigned long addr, | ||
275 | pmd_t *pmdp); | ||
276 | void (*pmd_update_defer)(struct mm_struct *mm, | ||
277 | unsigned long addr, pmd_t *pmdp); | ||
272 | 278 | ||
273 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, | 279 | pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, |
274 | pte_t *ptep); | 280 | pte_t *ptep); |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8ee45167e81..7e172955ee5 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -273,34 +273,34 @@ do { \ | |||
273 | typeof(var) pxo_new__ = (nval); \ | 273 | typeof(var) pxo_new__ = (nval); \ |
274 | switch (sizeof(var)) { \ | 274 | switch (sizeof(var)) { \ |
275 | case 1: \ | 275 | case 1: \ |
276 | asm("\n1:mov "__percpu_arg(1)",%%al" \ | 276 | asm("\n\tmov "__percpu_arg(1)",%%al" \ |
277 | "\n\tcmpxchgb %2, "__percpu_arg(1) \ | 277 | "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ |
278 | "\n\tjnz 1b" \ | 278 | "\n\tjnz 1b" \ |
279 | : "=a" (pxo_ret__), "+m" (var) \ | 279 | : "=&a" (pxo_ret__), "+m" (var) \ |
280 | : "q" (pxo_new__) \ | 280 | : "q" (pxo_new__) \ |
281 | : "memory"); \ | 281 | : "memory"); \ |
282 | break; \ | 282 | break; \ |
283 | case 2: \ | 283 | case 2: \ |
284 | asm("\n1:mov "__percpu_arg(1)",%%ax" \ | 284 | asm("\n\tmov "__percpu_arg(1)",%%ax" \ |
285 | "\n\tcmpxchgw %2, "__percpu_arg(1) \ | 285 | "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ |
286 | "\n\tjnz 1b" \ | 286 | "\n\tjnz 1b" \ |
287 | : "=a" (pxo_ret__), "+m" (var) \ | 287 | : "=&a" (pxo_ret__), "+m" (var) \ |
288 | : "r" (pxo_new__) \ | 288 | : "r" (pxo_new__) \ |
289 | : "memory"); \ | 289 | : "memory"); \ |
290 | break; \ | 290 | break; \ |
291 | case 4: \ | 291 | case 4: \ |
292 | asm("\n1:mov "__percpu_arg(1)",%%eax" \ | 292 | asm("\n\tmov "__percpu_arg(1)",%%eax" \ |
293 | "\n\tcmpxchgl %2, "__percpu_arg(1) \ | 293 | "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ |
294 | "\n\tjnz 1b" \ | 294 | "\n\tjnz 1b" \ |
295 | : "=a" (pxo_ret__), "+m" (var) \ | 295 | : "=&a" (pxo_ret__), "+m" (var) \ |
296 | : "r" (pxo_new__) \ | 296 | : "r" (pxo_new__) \ |
297 | : "memory"); \ | 297 | : "memory"); \ |
298 | break; \ | 298 | break; \ |
299 | case 8: \ | 299 | case 8: \ |
300 | asm("\n1:mov "__percpu_arg(1)",%%rax" \ | 300 | asm("\n\tmov "__percpu_arg(1)",%%rax" \ |
301 | "\n\tcmpxchgq %2, "__percpu_arg(1) \ | 301 | "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ |
302 | "\n\tjnz 1b" \ | 302 | "\n\tjnz 1b" \ |
303 | : "=a" (pxo_ret__), "+m" (var) \ | 303 | : "=&a" (pxo_ret__), "+m" (var) \ |
304 | : "r" (pxo_new__) \ | 304 | : "r" (pxo_new__) \ |
305 | : "memory"); \ | 305 | : "memory"); \ |
306 | break; \ | 306 | break; \ |
@@ -414,8 +414,6 @@ do { \ | |||
414 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | 414 | #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) |
415 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | 415 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) |
416 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | 416 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) |
417 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
418 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
419 | 417 | ||
420 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | 418 | #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
421 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | 419 | #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
@@ -432,8 +430,6 @@ do { \ | |||
432 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) | 430 | #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) |
433 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | 431 | #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) |
434 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | 432 | #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) |
435 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
436 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
437 | 433 | ||
438 | #ifndef CONFIG_M386 | 434 | #ifndef CONFIG_M386 |
439 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 435 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
@@ -475,11 +471,15 @@ do { \ | |||
475 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 471 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
476 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 472 | #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
477 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | 473 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) |
474 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
475 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
478 | 476 | ||
479 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | 477 | #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
480 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 478 | #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
481 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 479 | #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
482 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | ||
482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | ||
483 | #endif | 483 | #endif |
484 | 484 | ||
485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 2334982b339..98391db840c 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -46,6 +46,15 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
46 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) | 46 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #ifdef CONFIG_SMP | ||
50 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | ||
51 | { | ||
52 | return __pmd(xchg((pmdval_t *)xp, 0)); | ||
53 | } | ||
54 | #else | ||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | ||
56 | #endif | ||
57 | |||
49 | /* | 58 | /* |
50 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, | 59 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, |
51 | * split up the 29 bits of offset into this range: | 60 | * split up the 29 bits of offset into this range: |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 177b0165ea0..94b979d1b58 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -104,6 +104,29 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) | |||
104 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) | 104 | #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) |
105 | #endif | 105 | #endif |
106 | 106 | ||
107 | #ifdef CONFIG_SMP | ||
108 | union split_pmd { | ||
109 | struct { | ||
110 | u32 pmd_low; | ||
111 | u32 pmd_high; | ||
112 | }; | ||
113 | pmd_t pmd; | ||
114 | }; | ||
115 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) | ||
116 | { | ||
117 | union split_pmd res, *orig = (union split_pmd *)pmdp; | ||
118 | |||
119 | /* xchg acts as a barrier before setting of the high bits */ | ||
120 | res.pmd_low = xchg(&orig->pmd_low, 0); | ||
121 | res.pmd_high = orig->pmd_high; | ||
122 | orig->pmd_high = 0; | ||
123 | |||
124 | return res.pmd; | ||
125 | } | ||
126 | #else | ||
127 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | ||
128 | #endif | ||
129 | |||
107 | /* | 130 | /* |
108 | * Bits 0, 6 and 7 are taken in the low part of the pte, | 131 | * Bits 0, 6 and 7 are taken in the low part of the pte, |
109 | * put the 32 bits of offset into the high part. | 132 | * put the 32 bits of offset into the high part. |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index ada823a13c7..18601c86fab 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -35,6 +35,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); | |||
35 | #else /* !CONFIG_PARAVIRT */ | 35 | #else /* !CONFIG_PARAVIRT */ |
36 | #define set_pte(ptep, pte) native_set_pte(ptep, pte) | 36 | #define set_pte(ptep, pte) native_set_pte(ptep, pte) |
37 | #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) | 37 | #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) |
38 | #define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) | ||
38 | 39 | ||
39 | #define set_pte_atomic(ptep, pte) \ | 40 | #define set_pte_atomic(ptep, pte) \ |
40 | native_set_pte_atomic(ptep, pte) | 41 | native_set_pte_atomic(ptep, pte) |
@@ -59,6 +60,8 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); | |||
59 | 60 | ||
60 | #define pte_update(mm, addr, ptep) do { } while (0) | 61 | #define pte_update(mm, addr, ptep) do { } while (0) |
61 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | 62 | #define pte_update_defer(mm, addr, ptep) do { } while (0) |
63 | #define pmd_update(mm, addr, ptep) do { } while (0) | ||
64 | #define pmd_update_defer(mm, addr, ptep) do { } while (0) | ||
62 | 65 | ||
63 | #define pgd_val(x) native_pgd_val(x) | 66 | #define pgd_val(x) native_pgd_val(x) |
64 | #define __pgd(x) native_make_pgd(x) | 67 | #define __pgd(x) native_make_pgd(x) |
@@ -94,6 +97,11 @@ static inline int pte_young(pte_t pte) | |||
94 | return pte_flags(pte) & _PAGE_ACCESSED; | 97 | return pte_flags(pte) & _PAGE_ACCESSED; |
95 | } | 98 | } |
96 | 99 | ||
100 | static inline int pmd_young(pmd_t pmd) | ||
101 | { | ||
102 | return pmd_flags(pmd) & _PAGE_ACCESSED; | ||
103 | } | ||
104 | |||
97 | static inline int pte_write(pte_t pte) | 105 | static inline int pte_write(pte_t pte) |
98 | { | 106 | { |
99 | return pte_flags(pte) & _PAGE_RW; | 107 | return pte_flags(pte) & _PAGE_RW; |
@@ -142,6 +150,23 @@ static inline int pmd_large(pmd_t pte) | |||
142 | (_PAGE_PSE | _PAGE_PRESENT); | 150 | (_PAGE_PSE | _PAGE_PRESENT); |
143 | } | 151 | } |
144 | 152 | ||
153 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
154 | static inline int pmd_trans_splitting(pmd_t pmd) | ||
155 | { | ||
156 | return pmd_val(pmd) & _PAGE_SPLITTING; | ||
157 | } | ||
158 | |||
159 | static inline int pmd_trans_huge(pmd_t pmd) | ||
160 | { | ||
161 | return pmd_val(pmd) & _PAGE_PSE; | ||
162 | } | ||
163 | |||
164 | static inline int has_transparent_hugepage(void) | ||
165 | { | ||
166 | return cpu_has_pse; | ||
167 | } | ||
168 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
169 | |||
145 | static inline pte_t pte_set_flags(pte_t pte, pteval_t set) | 170 | static inline pte_t pte_set_flags(pte_t pte, pteval_t set) |
146 | { | 171 | { |
147 | pteval_t v = native_pte_val(pte); | 172 | pteval_t v = native_pte_val(pte); |
@@ -216,6 +241,55 @@ static inline pte_t pte_mkspecial(pte_t pte) | |||
216 | return pte_set_flags(pte, _PAGE_SPECIAL); | 241 | return pte_set_flags(pte, _PAGE_SPECIAL); |
217 | } | 242 | } |
218 | 243 | ||
244 | static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) | ||
245 | { | ||
246 | pmdval_t v = native_pmd_val(pmd); | ||
247 | |||
248 | return __pmd(v | set); | ||
249 | } | ||
250 | |||
251 | static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) | ||
252 | { | ||
253 | pmdval_t v = native_pmd_val(pmd); | ||
254 | |||
255 | return __pmd(v & ~clear); | ||
256 | } | ||
257 | |||
258 | static inline pmd_t pmd_mkold(pmd_t pmd) | ||
259 | { | ||
260 | return pmd_clear_flags(pmd, _PAGE_ACCESSED); | ||
261 | } | ||
262 | |||
263 | static inline pmd_t pmd_wrprotect(pmd_t pmd) | ||
264 | { | ||
265 | return pmd_clear_flags(pmd, _PAGE_RW); | ||
266 | } | ||
267 | |||
268 | static inline pmd_t pmd_mkdirty(pmd_t pmd) | ||
269 | { | ||
270 | return pmd_set_flags(pmd, _PAGE_DIRTY); | ||
271 | } | ||
272 | |||
273 | static inline pmd_t pmd_mkhuge(pmd_t pmd) | ||
274 | { | ||
275 | return pmd_set_flags(pmd, _PAGE_PSE); | ||
276 | } | ||
277 | |||
278 | static inline pmd_t pmd_mkyoung(pmd_t pmd) | ||
279 | { | ||
280 | return pmd_set_flags(pmd, _PAGE_ACCESSED); | ||
281 | } | ||
282 | |||
283 | static inline pmd_t pmd_mkwrite(pmd_t pmd) | ||
284 | { | ||
285 | return pmd_set_flags(pmd, _PAGE_RW); | ||
286 | } | ||
287 | |||
288 | static inline pmd_t pmd_mknotpresent(pmd_t pmd) | ||
289 | { | ||
290 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | ||
291 | } | ||
292 | |||
219 | /* | 293 | /* |
220 | * Mask out unsupported bits in a present pgprot. Non-present pgprots | 294 | * Mask out unsupported bits in a present pgprot. Non-present pgprots |
221 | * can use those bits for other purposes, so leave them be. | 295 | * can use those bits for other purposes, so leave them be. |
@@ -256,6 +330,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
256 | return __pte(val); | 330 | return __pte(val); |
257 | } | 331 | } |
258 | 332 | ||
333 | static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
334 | { | ||
335 | pmdval_t val = pmd_val(pmd); | ||
336 | |||
337 | val &= _HPAGE_CHG_MASK; | ||
338 | val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; | ||
339 | |||
340 | return __pmd(val); | ||
341 | } | ||
342 | |||
259 | /* mprotect needs to preserve PAT bits when updating vm_page_prot */ | 343 | /* mprotect needs to preserve PAT bits when updating vm_page_prot */ |
260 | #define pgprot_modify pgprot_modify | 344 | #define pgprot_modify pgprot_modify |
261 | static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | 345 | static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) |
@@ -350,7 +434,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) | |||
350 | * Currently stuck as a macro due to indirect forward reference to | 434 | * Currently stuck as a macro due to indirect forward reference to |
351 | * linux/mmzone.h's __section_mem_map_addr() definition: | 435 | * linux/mmzone.h's __section_mem_map_addr() definition: |
352 | */ | 436 | */ |
353 | #define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) | 437 | #define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) |
354 | 438 | ||
355 | /* | 439 | /* |
356 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] | 440 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] |
@@ -524,12 +608,26 @@ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) | |||
524 | return res; | 608 | return res; |
525 | } | 609 | } |
526 | 610 | ||
611 | static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) | ||
612 | { | ||
613 | pmd_t res = *pmdp; | ||
614 | |||
615 | native_pmd_clear(pmdp); | ||
616 | return res; | ||
617 | } | ||
618 | |||
527 | static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, | 619 | static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, |
528 | pte_t *ptep , pte_t pte) | 620 | pte_t *ptep , pte_t pte) |
529 | { | 621 | { |
530 | native_set_pte(ptep, pte); | 622 | native_set_pte(ptep, pte); |
531 | } | 623 | } |
532 | 624 | ||
625 | static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
626 | pmd_t *pmdp , pmd_t pmd) | ||
627 | { | ||
628 | native_set_pmd(pmdp, pmd); | ||
629 | } | ||
630 | |||
533 | #ifndef CONFIG_PARAVIRT | 631 | #ifndef CONFIG_PARAVIRT |
534 | /* | 632 | /* |
535 | * Rules for using pte_update - it must be called after any PTE update which | 633 | * Rules for using pte_update - it must be called after any PTE update which |
@@ -607,6 +705,49 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, | |||
607 | 705 | ||
608 | #define flush_tlb_fix_spurious_fault(vma, address) | 706 | #define flush_tlb_fix_spurious_fault(vma, address) |
609 | 707 | ||
708 | #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) | ||
709 | |||
710 | #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS | ||
711 | extern int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
712 | unsigned long address, pmd_t *pmdp, | ||
713 | pmd_t entry, int dirty); | ||
714 | |||
715 | #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG | ||
716 | extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
717 | unsigned long addr, pmd_t *pmdp); | ||
718 | |||
719 | #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH | ||
720 | extern int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
721 | unsigned long address, pmd_t *pmdp); | ||
722 | |||
723 | |||
724 | #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH | ||
725 | extern void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
726 | unsigned long addr, pmd_t *pmdp); | ||
727 | |||
728 | #define __HAVE_ARCH_PMD_WRITE | ||
729 | static inline int pmd_write(pmd_t pmd) | ||
730 | { | ||
731 | return pmd_flags(pmd) & _PAGE_RW; | ||
732 | } | ||
733 | |||
734 | #define __HAVE_ARCH_PMDP_GET_AND_CLEAR | ||
735 | static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
736 | pmd_t *pmdp) | ||
737 | { | ||
738 | pmd_t pmd = native_pmdp_get_and_clear(pmdp); | ||
739 | pmd_update(mm, addr, pmdp); | ||
740 | return pmd; | ||
741 | } | ||
742 | |||
743 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | ||
744 | static inline void pmdp_set_wrprotect(struct mm_struct *mm, | ||
745 | unsigned long addr, pmd_t *pmdp) | ||
746 | { | ||
747 | clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); | ||
748 | pmd_update(mm, addr, pmdp); | ||
749 | } | ||
750 | |||
610 | /* | 751 | /* |
611 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | 752 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); |
612 | * | 753 | * |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f86da20347f..975f709e09a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -59,6 +59,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) | |||
59 | native_set_pte(ptep, pte); | 59 | native_set_pte(ptep, pte); |
60 | } | 60 | } |
61 | 61 | ||
62 | static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
63 | { | ||
64 | *pmdp = pmd; | ||
65 | } | ||
66 | |||
67 | static inline void native_pmd_clear(pmd_t *pmd) | ||
68 | { | ||
69 | native_set_pmd(pmd, native_make_pmd(0)); | ||
70 | } | ||
71 | |||
62 | static inline pte_t native_ptep_get_and_clear(pte_t *xp) | 72 | static inline pte_t native_ptep_get_and_clear(pte_t *xp) |
63 | { | 73 | { |
64 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
@@ -72,14 +82,17 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
72 | #endif | 82 | #endif |
73 | } | 83 | } |
74 | 84 | ||
75 | static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) | 85 | static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) |
76 | { | 86 | { |
77 | *pmdp = pmd; | 87 | #ifdef CONFIG_SMP |
78 | } | 88 | return native_make_pmd(xchg(&xp->pmd, 0)); |
79 | 89 | #else | |
80 | static inline void native_pmd_clear(pmd_t *pmd) | 90 | /* native_local_pmdp_get_and_clear, |
81 | { | 91 | but duplicated because of cyclic dependency */ |
82 | native_set_pmd(pmd, native_make_pmd(0)); | 92 | pmd_t ret = *xp; |
93 | native_pmd_clear(xp); | ||
94 | return ret; | ||
95 | #endif | ||
83 | } | 96 | } |
84 | 97 | ||
85 | static inline void native_set_pud(pud_t *pudp, pud_t pud) | 98 | static inline void native_set_pud(pud_t *pudp, pud_t pud) |
@@ -168,6 +181,7 @@ extern void cleanup_highmap(void); | |||
168 | #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) | 181 | #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) |
169 | 182 | ||
170 | #define __HAVE_ARCH_PTE_SAME | 183 | #define __HAVE_ARCH_PTE_SAME |
184 | |||
171 | #endif /* !__ASSEMBLY__ */ | 185 | #endif /* !__ASSEMBLY__ */ |
172 | 186 | ||
173 | #endif /* _ASM_X86_PGTABLE_64_H */ | 187 | #endif /* _ASM_X86_PGTABLE_64_H */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d1f4a760be2..7db7723d1f3 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ |
23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 |
24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
25 | #define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ | ||
25 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | 26 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ |
26 | 27 | ||
27 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ | 28 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ |
@@ -45,6 +46,7 @@ | |||
45 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 46 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
46 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | 47 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) |
47 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) | 48 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) |
49 | #define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING) | ||
48 | #define __HAVE_ARCH_PTE_SPECIAL | 50 | #define __HAVE_ARCH_PTE_SPECIAL |
49 | 51 | ||
50 | #ifdef CONFIG_KMEMCHECK | 52 | #ifdef CONFIG_KMEMCHECK |
@@ -70,6 +72,7 @@ | |||
70 | /* Set of bits not changed in pte_modify */ | 72 | /* Set of bits not changed in pte_modify */ |
71 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ | 73 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ |
72 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) | 74 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) |
75 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) | ||
73 | 76 | ||
74 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) | 77 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) |
75 | #define _PAGE_CACHE_WB (0) | 78 | #define _PAGE_CACHE_WB (0) |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 53fd1d5a1fe..45636cefa18 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -761,10 +761,11 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); | |||
761 | extern void init_c1e_mask(void); | 761 | extern void init_c1e_mask(void); |
762 | 762 | ||
763 | extern unsigned long boot_option_idle_override; | 763 | extern unsigned long boot_option_idle_override; |
764 | extern unsigned long idle_halt; | ||
765 | extern unsigned long idle_nomwait; | ||
766 | extern bool c1e_detected; | 764 | extern bool c1e_detected; |
767 | 765 | ||
766 | enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, | ||
767 | IDLE_POLL, IDLE_FORCE_MWAIT}; | ||
768 | |||
768 | extern void enable_sep_cpu(void); | 769 | extern void enable_sep_cpu(void); |
769 | extern int sysenter_setup(void); | 770 | extern int sysenter_setup(void); |
770 | 771 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4c2f63c7fc1..1f469513677 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -40,10 +40,7 @@ DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); | |||
40 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 40 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); |
41 | 41 | ||
42 | /* Static state in head.S used to set up a CPU */ | 42 | /* Static state in head.S used to set up a CPU */ |
43 | extern struct { | 43 | extern unsigned long stack_start; /* Initial stack pointer address */ |
44 | void *sp; | ||
45 | unsigned short ss; | ||
46 | } stack_start; | ||
47 | 44 | ||
48 | struct smp_ops { | 45 | struct smp_ops { |
49 | void (*smp_prepare_boot_cpu)(void); | 46 | void (*smp_prepare_boot_cpu)(void); |
diff --git a/arch/x86/include/asm/system_64.h b/arch/x86/include/asm/system_64.h deleted file mode 100644 index 1159e091ad0..00000000000 --- a/arch/x86/include/asm/system_64.h +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | #ifndef _ASM_X86_SYSTEM_64_H | ||
2 | #define _ASM_X86_SYSTEM_64_H | ||
3 | |||
4 | #include <asm/segment.h> | ||
5 | #include <asm/cmpxchg.h> | ||
6 | |||
7 | |||
8 | static inline unsigned long read_cr8(void) | ||
9 | { | ||
10 | unsigned long cr8; | ||
11 | asm volatile("movq %%cr8,%0" : "=r" (cr8)); | ||
12 | return cr8; | ||
13 | } | ||
14 | |||
15 | static inline void write_cr8(unsigned long val) | ||
16 | { | ||
17 | asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); | ||
18 | } | ||
19 | |||
20 | #include <linux/irqflags.h> | ||
21 | |||
22 | #endif /* _ASM_X86_SYSTEM_64_H */ | ||
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 8760cc60a21..f25bdf238a3 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -42,6 +42,11 @@ extern unsigned int machine_to_phys_order; | |||
42 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 42 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
44 | 44 | ||
45 | extern int m2p_add_override(unsigned long mfn, struct page *page); | ||
46 | extern int m2p_remove_override(struct page *page); | ||
47 | extern struct page *m2p_find_override(unsigned long mfn); | ||
48 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | ||
49 | |||
45 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 50 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
46 | { | 51 | { |
47 | unsigned long mfn; | 52 | unsigned long mfn; |
@@ -72,9 +77,6 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
72 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 77 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
73 | return mfn; | 78 | return mfn; |
74 | 79 | ||
75 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | ||
76 | return ~0; | ||
77 | |||
78 | pfn = 0; | 80 | pfn = 0; |
79 | /* | 81 | /* |
80 | * The array access can fail (e.g., device space beyond end of RAM). | 82 | * The array access can fail (e.g., device space beyond end of RAM). |
@@ -83,6 +85,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
83 | */ | 85 | */ |
84 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | 86 | __get_user(pfn, &machine_to_phys_mapping[mfn]); |
85 | 87 | ||
88 | /* | ||
89 | * If this appears to be a foreign mfn (because the pfn | ||
90 | * doesn't map back to the mfn), then check the local override | ||
91 | * table to see if there's a better pfn to use. | ||
92 | */ | ||
93 | if (get_phys_to_machine(pfn) != mfn) | ||
94 | pfn = m2p_find_override_pfn(mfn, pfn); | ||
95 | |||
86 | return pfn; | 96 | return pfn; |
87 | } | 97 | } |
88 | 98 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ec881c6bfee..b3a71137983 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -509,6 +509,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
509 | 509 | ||
510 | return 0; | 510 | return 0; |
511 | } | 511 | } |
512 | EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); | ||
512 | 513 | ||
513 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | 514 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) |
514 | { | 515 | { |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 69fd72aa559..68d1537b8c8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -12,10 +12,8 @@ | |||
12 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
13 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
14 | #include <asm/desc.h> | 14 | #include <asm/desc.h> |
15 | |||
16 | #ifdef CONFIG_X86_32 | ||
17 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
18 | #endif | 16 | #include <asm/cacheflush.h> |
19 | 17 | ||
20 | #include "realmode/wakeup.h" | 18 | #include "realmode/wakeup.h" |
21 | #include "sleep.h" | 19 | #include "sleep.h" |
@@ -100,7 +98,7 @@ int acpi_save_state_mem(void) | |||
100 | #else /* CONFIG_64BIT */ | 98 | #else /* CONFIG_64BIT */ |
101 | header->trampoline_segment = setup_trampoline() >> 4; | 99 | header->trampoline_segment = setup_trampoline() >> 4; |
102 | #ifdef CONFIG_SMP | 100 | #ifdef CONFIG_SMP |
103 | stack_start.sp = temp_stack + sizeof(temp_stack); | 101 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); |
104 | early_gdt_descr.address = | 102 | early_gdt_descr.address = |
105 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); | 103 | (unsigned long)get_cpu_gdt_table(smp_processor_id()); |
106 | initial_gs = per_cpu_offset(smp_processor_id()); | 104 | initial_gs = per_cpu_offset(smp_processor_id()); |
@@ -149,6 +147,15 @@ void __init acpi_reserve_wakeup_memory(void) | |||
149 | memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | 147 | memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); |
150 | } | 148 | } |
151 | 149 | ||
150 | int __init acpi_configure_wakeup_memory(void) | ||
151 | { | ||
152 | if (acpi_realmode) | ||
153 | set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT); | ||
154 | |||
155 | return 0; | ||
156 | } | ||
157 | arch_initcall(acpi_configure_wakeup_memory); | ||
158 | |||
152 | 159 | ||
153 | static int __init acpi_sleep_setup(char *str) | 160 | static int __init acpi_sleep_setup(char *str) |
154 | { | 161 | { |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 7c9ab59653e..51ef31a89be 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -313,14 +313,16 @@ static void apbt_setup_irq(struct apbt_dev *adev) | |||
313 | if (adev->irq == 0) | 313 | if (adev->irq == 0) |
314 | return; | 314 | return; |
315 | 315 | ||
316 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); | ||
317 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
318 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | ||
319 | __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); | ||
320 | |||
316 | if (system_state == SYSTEM_BOOTING) { | 321 | if (system_state == SYSTEM_BOOTING) { |
317 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); | ||
318 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
319 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | ||
320 | __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); | ||
321 | if (request_irq(adev->irq, apbt_interrupt_handler, | 322 | if (request_irq(adev->irq, apbt_interrupt_handler, |
322 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | 323 | IRQF_TIMER | IRQF_DISABLED | |
323 | adev->name, adev)) { | 324 | IRQF_NOBALANCING, |
325 | adev->name, adev)) { | ||
324 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | 326 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", |
325 | adev->num); | 327 | adev->num); |
326 | } | 328 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 7283e98deaa..ec2c19a7b8e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -45,6 +45,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
45 | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ | 45 | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ |
46 | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ | 46 | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ |
47 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ | 47 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ |
48 | { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ | ||
48 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ | 49 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ |
49 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 50 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
50 | { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 51 | { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
@@ -66,6 +67,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
66 | { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ | 67 | { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ |
67 | { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ | 68 | { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ |
68 | { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ | 69 | { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ |
70 | { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ | ||
69 | { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ | 71 | { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
70 | { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ | 72 | { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ |
71 | { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ | 73 | { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
@@ -87,6 +89,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
87 | { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 89 | { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
88 | { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ | 90 | { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
89 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ | 91 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ |
92 | { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ | ||
90 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ | 93 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ |
91 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ | 94 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ |
92 | { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ | 95 | { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e12246ff5aa..6f8c5e9da97 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -59,6 +59,7 @@ struct thermal_state { | |||
59 | 59 | ||
60 | /* Callback to handle core threshold interrupts */ | 60 | /* Callback to handle core threshold interrupts */ |
61 | int (*platform_thermal_notify)(__u64 msr_val); | 61 | int (*platform_thermal_notify)(__u64 msr_val); |
62 | EXPORT_SYMBOL(platform_thermal_notify); | ||
62 | 63 | ||
63 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 64 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
64 | 65 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 01c0f3ee6cc..bebabec5b44 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -793,13 +793,21 @@ void set_mtrr_aps_delayed_init(void) | |||
793 | } | 793 | } |
794 | 794 | ||
795 | /* | 795 | /* |
796 | * MTRR initialization for all AP's | 796 | * Delayed MTRR initialization for all AP's |
797 | */ | 797 | */ |
798 | void mtrr_aps_init(void) | 798 | void mtrr_aps_init(void) |
799 | { | 799 | { |
800 | if (!use_intel()) | 800 | if (!use_intel()) |
801 | return; | 801 | return; |
802 | 802 | ||
803 | /* | ||
804 | * Check if someone has requested the delay of AP MTRR initialization, | ||
805 | * by doing set_mtrr_aps_delayed_init(), prior to this point. If not, | ||
806 | * then we are done. | ||
807 | */ | ||
808 | if (!mtrr_aps_delayed_init) | ||
809 | return; | ||
810 | |||
803 | set_mtrr(~0U, 0, 0, 0); | 811 | set_mtrr(~0U, 0, 0, 0); |
804 | mtrr_aps_delayed_init = false; | 812 | mtrr_aps_delayed_init = false; |
805 | } | 813 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index e56b9bfbabd..f7a0993c1e7 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -682,7 +682,7 @@ static int p4_validate_raw_event(struct perf_event *event) | |||
682 | * if an event is shared accross the logical threads | 682 | * if an event is shared accross the logical threads |
683 | * the user needs special permissions to be able to use it | 683 | * the user needs special permissions to be able to use it |
684 | */ | 684 | */ |
685 | if (p4_event_bind_map[v].shared) { | 685 | if (p4_ht_active() && p4_event_bind_map[v].shared) { |
686 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | 686 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
687 | return -EACCES; | 687 | return -EACCES; |
688 | } | 688 | } |
@@ -727,7 +727,8 @@ static int p4_hw_config(struct perf_event *event) | |||
727 | event->hw.config = p4_set_ht_bit(event->hw.config); | 727 | event->hw.config = p4_set_ht_bit(event->hw.config); |
728 | 728 | ||
729 | if (event->attr.type == PERF_TYPE_RAW) { | 729 | if (event->attr.type == PERF_TYPE_RAW) { |
730 | 730 | struct p4_event_bind *bind; | |
731 | unsigned int esel; | ||
731 | /* | 732 | /* |
732 | * Clear bits we reserve to be managed by kernel itself | 733 | * Clear bits we reserve to be managed by kernel itself |
733 | * and never allowed from a user space | 734 | * and never allowed from a user space |
@@ -743,6 +744,13 @@ static int p4_hw_config(struct perf_event *event) | |||
743 | * bits since we keep additional info here (for cache events and etc) | 744 | * bits since we keep additional info here (for cache events and etc) |
744 | */ | 745 | */ |
745 | event->hw.config |= event->attr.config; | 746 | event->hw.config |= event->attr.config; |
747 | bind = p4_config_get_bind(event->attr.config); | ||
748 | if (!bind) { | ||
749 | rc = -EINVAL; | ||
750 | goto out; | ||
751 | } | ||
752 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
753 | event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
746 | } | 754 | } |
747 | 755 | ||
748 | rc = x86_setup_perfctr(event); | 756 | rc = x86_setup_perfctr(event); |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d6fb146c0d8..df20723a6a1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -234,6 +234,7 @@ unsigned __kprobes long oops_begin(void) | |||
234 | bust_spinlocks(1); | 234 | bust_spinlocks(1); |
235 | return flags; | 235 | return flags; |
236 | } | 236 | } |
237 | EXPORT_SYMBOL_GPL(oops_begin); | ||
237 | 238 | ||
238 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | 239 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) |
239 | { | 240 | { |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 64101335de1..a6b6fcf7f0a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -149,13 +149,13 @@ void dump_trace(struct task_struct *task, | |||
149 | unsigned used = 0; | 149 | unsigned used = 0; |
150 | struct thread_info *tinfo; | 150 | struct thread_info *tinfo; |
151 | int graph = 0; | 151 | int graph = 0; |
152 | unsigned long dummy; | ||
152 | unsigned long bp; | 153 | unsigned long bp; |
153 | 154 | ||
154 | if (!task) | 155 | if (!task) |
155 | task = current; | 156 | task = current; |
156 | 157 | ||
157 | if (!stack) { | 158 | if (!stack) { |
158 | unsigned long dummy; | ||
159 | stack = &dummy; | 159 | stack = &dummy; |
160 | if (task && task != current) | 160 | if (task && task != current) |
161 | stack = (unsigned long *)task->thread.sp; | 161 | stack = (unsigned long *)task->thread.sp; |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0c2b7ef7a34..294f26da0c0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
16 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
17 | #include <linux/acpi.h> | ||
17 | #include <linux/firmware-map.h> | 18 | #include <linux/firmware-map.h> |
18 | #include <linux/memblock.h> | 19 | #include <linux/memblock.h> |
19 | 20 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index fc293dc8dc3..767d6c43de3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -85,6 +85,8 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) | |||
85 | */ | 85 | */ |
86 | __HEAD | 86 | __HEAD |
87 | ENTRY(startup_32) | 87 | ENTRY(startup_32) |
88 | movl pa(stack_start),%ecx | ||
89 | |||
88 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 90 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking |
89 | us to not reload segments */ | 91 | us to not reload segments */ |
90 | testb $(1<<6), BP_loadflags(%esi) | 92 | testb $(1<<6), BP_loadflags(%esi) |
@@ -99,7 +101,9 @@ ENTRY(startup_32) | |||
99 | movl %eax,%es | 101 | movl %eax,%es |
100 | movl %eax,%fs | 102 | movl %eax,%fs |
101 | movl %eax,%gs | 103 | movl %eax,%gs |
104 | movl %eax,%ss | ||
102 | 2: | 105 | 2: |
106 | leal -__PAGE_OFFSET(%ecx),%esp | ||
103 | 107 | ||
104 | /* | 108 | /* |
105 | * Clear BSS first so that there are no surprises... | 109 | * Clear BSS first so that there are no surprises... |
@@ -145,8 +149,6 @@ ENTRY(startup_32) | |||
145 | * _brk_end is set up to point to the first "safe" location. | 149 | * _brk_end is set up to point to the first "safe" location. |
146 | * Mappings are created both at virtual address 0 (identity mapping) | 150 | * Mappings are created both at virtual address 0 (identity mapping) |
147 | * and PAGE_OFFSET for up to _end. | 151 | * and PAGE_OFFSET for up to _end. |
148 | * | ||
149 | * Note that the stack is not yet set up! | ||
150 | */ | 152 | */ |
151 | #ifdef CONFIG_X86_PAE | 153 | #ifdef CONFIG_X86_PAE |
152 | 154 | ||
@@ -282,6 +284,9 @@ ENTRY(startup_32_smp) | |||
282 | movl %eax,%es | 284 | movl %eax,%es |
283 | movl %eax,%fs | 285 | movl %eax,%fs |
284 | movl %eax,%gs | 286 | movl %eax,%gs |
287 | movl pa(stack_start),%ecx | ||
288 | movl %eax,%ss | ||
289 | leal -__PAGE_OFFSET(%ecx),%esp | ||
285 | #endif /* CONFIG_SMP */ | 290 | #endif /* CONFIG_SMP */ |
286 | default_entry: | 291 | default_entry: |
287 | 292 | ||
@@ -347,8 +352,8 @@ default_entry: | |||
347 | movl %eax,%cr0 /* ..and set paging (PG) bit */ | 352 | movl %eax,%cr0 /* ..and set paging (PG) bit */ |
348 | ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ | 353 | ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ |
349 | 1: | 354 | 1: |
350 | /* Set up the stack pointer */ | 355 | /* Shift the stack pointer to a virtual address */ |
351 | lss stack_start,%esp | 356 | addl $__PAGE_OFFSET, %esp |
352 | 357 | ||
353 | /* | 358 | /* |
354 | * Initialize eflags. Some BIOS's leave bits like NT set. This would | 359 | * Initialize eflags. Some BIOS's leave bits like NT set. This would |
@@ -360,9 +365,7 @@ default_entry: | |||
360 | 365 | ||
361 | #ifdef CONFIG_SMP | 366 | #ifdef CONFIG_SMP |
362 | cmpb $0, ready | 367 | cmpb $0, ready |
363 | jz 1f /* Initial CPU cleans BSS */ | 368 | jnz checkCPUtype |
364 | jmp checkCPUtype | ||
365 | 1: | ||
366 | #endif /* CONFIG_SMP */ | 369 | #endif /* CONFIG_SMP */ |
367 | 370 | ||
368 | /* | 371 | /* |
@@ -470,14 +473,7 @@ is386: movl $2,%ecx # set MP | |||
470 | 473 | ||
471 | cld # gcc2 wants the direction flag cleared at all times | 474 | cld # gcc2 wants the direction flag cleared at all times |
472 | pushl $0 # fake return address for unwinder | 475 | pushl $0 # fake return address for unwinder |
473 | #ifdef CONFIG_SMP | ||
474 | movb ready, %cl | ||
475 | movb $1, ready | 476 | movb $1, ready |
476 | cmpb $0,%cl # the first CPU calls start_kernel | ||
477 | je 1f | ||
478 | movl (stack_start), %esp | ||
479 | 1: | ||
480 | #endif /* CONFIG_SMP */ | ||
481 | jmp *(initial_code) | 477 | jmp *(initial_code) |
482 | 478 | ||
483 | /* | 479 | /* |
@@ -670,15 +666,15 @@ ENTRY(initial_page_table) | |||
670 | #endif | 666 | #endif |
671 | 667 | ||
672 | .data | 668 | .data |
669 | .balign 4 | ||
673 | ENTRY(stack_start) | 670 | ENTRY(stack_start) |
674 | .long init_thread_union+THREAD_SIZE | 671 | .long init_thread_union+THREAD_SIZE |
675 | .long __BOOT_DS | ||
676 | |||
677 | ready: .byte 0 | ||
678 | 672 | ||
679 | early_recursion_flag: | 673 | early_recursion_flag: |
680 | .long 0 | 674 | .long 0 |
681 | 675 | ||
676 | ready: .byte 0 | ||
677 | |||
682 | int_msg: | 678 | int_msg: |
683 | .asciz "Unknown interrupt or fault at: %p %p %p\n" | 679 | .asciz "Unknown interrupt or fault at: %p %p %p\n" |
684 | 680 | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 48ff6dcffa0..9974d21048f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -129,8 +129,7 @@ void __cpuinit irq_ctx_init(int cpu) | |||
129 | irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), | 129 | irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), |
130 | THREAD_FLAGS, | 130 | THREAD_FLAGS, |
131 | THREAD_ORDER)); | 131 | THREAD_ORDER)); |
132 | irqctx->tinfo.task = NULL; | 132 | memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); |
133 | irqctx->tinfo.exec_domain = NULL; | ||
134 | irqctx->tinfo.cpu = cpu; | 133 | irqctx->tinfo.cpu = cpu; |
135 | irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; | 134 | irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; |
136 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | 135 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); |
@@ -140,10 +139,8 @@ void __cpuinit irq_ctx_init(int cpu) | |||
140 | irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), | 139 | irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), |
141 | THREAD_FLAGS, | 140 | THREAD_FLAGS, |
142 | THREAD_ORDER)); | 141 | THREAD_ORDER)); |
143 | irqctx->tinfo.task = NULL; | 142 | memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); |
144 | irqctx->tinfo.exec_domain = NULL; | ||
145 | irqctx->tinfo.cpu = cpu; | 143 | irqctx->tinfo.cpu = cpu; |
146 | irqctx->tinfo.preempt_count = 0; | ||
147 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | 144 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); |
148 | 145 | ||
149 | per_cpu(softirq_ctx, cpu) = irqctx; | 146 | per_cpu(softirq_ctx, cpu) = irqctx; |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 8f295609173..ab23f1ad4bf 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -37,20 +37,11 @@ | |||
37 | 37 | ||
38 | void *module_alloc(unsigned long size) | 38 | void *module_alloc(unsigned long size) |
39 | { | 39 | { |
40 | struct vm_struct *area; | 40 | if (PAGE_ALIGN(size) > MODULES_LEN) |
41 | |||
42 | if (!size) | ||
43 | return NULL; | ||
44 | size = PAGE_ALIGN(size); | ||
45 | if (size > MODULES_LEN) | ||
46 | return NULL; | 41 | return NULL; |
47 | 42 | return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, | |
48 | area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); | 43 | GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, |
49 | if (!area) | 44 | -1, __builtin_return_address(0)); |
50 | return NULL; | ||
51 | |||
52 | return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, | ||
53 | PAGE_KERNEL_EXEC); | ||
54 | } | 45 | } |
55 | 46 | ||
56 | /* Free memory returned from module_alloc */ | 47 | /* Free memory returned from module_alloc */ |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index c5b250011fd..869e1aeeb71 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -421,8 +421,11 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
421 | .set_pte = native_set_pte, | 421 | .set_pte = native_set_pte, |
422 | .set_pte_at = native_set_pte_at, | 422 | .set_pte_at = native_set_pte_at, |
423 | .set_pmd = native_set_pmd, | 423 | .set_pmd = native_set_pmd, |
424 | .set_pmd_at = native_set_pmd_at, | ||
424 | .pte_update = paravirt_nop, | 425 | .pte_update = paravirt_nop, |
425 | .pte_update_defer = paravirt_nop, | 426 | .pte_update_defer = paravirt_nop, |
427 | .pmd_update = paravirt_nop, | ||
428 | .pmd_update_defer = paravirt_nop, | ||
426 | 429 | ||
427 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 430 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
428 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 431 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 09c08a1c706..e764fc05d70 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <trace/events/power.h> | 15 | #include <trace/events/power.h> |
16 | #include <linux/hw_breakpoint.h> | 16 | #include <linux/hw_breakpoint.h> |
17 | #include <asm/cpu.h> | ||
17 | #include <asm/system.h> | 18 | #include <asm/system.h> |
18 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
19 | #include <asm/syscalls.h> | 20 | #include <asm/syscalls.h> |
@@ -22,11 +23,6 @@ | |||
22 | #include <asm/i387.h> | 23 | #include <asm/i387.h> |
23 | #include <asm/debugreg.h> | 24 | #include <asm/debugreg.h> |
24 | 25 | ||
25 | unsigned long idle_halt; | ||
26 | EXPORT_SYMBOL(idle_halt); | ||
27 | unsigned long idle_nomwait; | ||
28 | EXPORT_SYMBOL(idle_nomwait); | ||
29 | |||
30 | struct kmem_cache *task_xstate_cachep; | 26 | struct kmem_cache *task_xstate_cachep; |
31 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | 27 | EXPORT_SYMBOL_GPL(task_xstate_cachep); |
32 | 28 | ||
@@ -327,7 +323,7 @@ long sys_execve(const char __user *name, | |||
327 | /* | 323 | /* |
328 | * Idle related variables and functions | 324 | * Idle related variables and functions |
329 | */ | 325 | */ |
330 | unsigned long boot_option_idle_override = 0; | 326 | unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; |
331 | EXPORT_SYMBOL(boot_option_idle_override); | 327 | EXPORT_SYMBOL(boot_option_idle_override); |
332 | 328 | ||
333 | /* | 329 | /* |
@@ -386,6 +382,8 @@ void default_idle(void) | |||
386 | else | 382 | else |
387 | local_irq_enable(); | 383 | local_irq_enable(); |
388 | current_thread_info()->status |= TS_POLLING; | 384 | current_thread_info()->status |= TS_POLLING; |
385 | trace_power_end(smp_processor_id()); | ||
386 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
389 | } else { | 387 | } else { |
390 | local_irq_enable(); | 388 | local_irq_enable(); |
391 | /* loop is done by the caller */ | 389 | /* loop is done by the caller */ |
@@ -443,8 +441,6 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
443 | */ | 441 | */ |
444 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 442 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
445 | { | 443 | { |
446 | trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); | ||
447 | trace_cpu_idle((ax>>4)+1, smp_processor_id()); | ||
448 | if (!need_resched()) { | 444 | if (!need_resched()) { |
449 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) | 445 | if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) |
450 | clflush((void *)¤t_thread_info()->flags); | 446 | clflush((void *)¤t_thread_info()->flags); |
@@ -471,6 +467,8 @@ static void mwait_idle(void) | |||
471 | __sti_mwait(0, 0); | 467 | __sti_mwait(0, 0); |
472 | else | 468 | else |
473 | local_irq_enable(); | 469 | local_irq_enable(); |
470 | trace_power_end(smp_processor_id()); | ||
471 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
474 | } else | 472 | } else |
475 | local_irq_enable(); | 473 | local_irq_enable(); |
476 | } | 474 | } |
@@ -503,17 +501,16 @@ static void poll_idle(void) | |||
503 | * | 501 | * |
504 | * idle=mwait overrides this decision and forces the usage of mwait. | 502 | * idle=mwait overrides this decision and forces the usage of mwait. |
505 | */ | 503 | */ |
506 | static int __cpuinitdata force_mwait; | ||
507 | 504 | ||
508 | #define MWAIT_INFO 0x05 | 505 | #define MWAIT_INFO 0x05 |
509 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | 506 | #define MWAIT_ECX_EXTENDED_INFO 0x01 |
510 | #define MWAIT_EDX_C1 0xf0 | 507 | #define MWAIT_EDX_C1 0xf0 |
511 | 508 | ||
512 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | 509 | int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) |
513 | { | 510 | { |
514 | u32 eax, ebx, ecx, edx; | 511 | u32 eax, ebx, ecx, edx; |
515 | 512 | ||
516 | if (force_mwait) | 513 | if (boot_option_idle_override == IDLE_FORCE_MWAIT) |
517 | return 1; | 514 | return 1; |
518 | 515 | ||
519 | if (c->cpuid_level < MWAIT_INFO) | 516 | if (c->cpuid_level < MWAIT_INFO) |
@@ -633,9 +630,10 @@ static int __init idle_setup(char *str) | |||
633 | if (!strcmp(str, "poll")) { | 630 | if (!strcmp(str, "poll")) { |
634 | printk("using polling idle threads.\n"); | 631 | printk("using polling idle threads.\n"); |
635 | pm_idle = poll_idle; | 632 | pm_idle = poll_idle; |
636 | } else if (!strcmp(str, "mwait")) | 633 | boot_option_idle_override = IDLE_POLL; |
637 | force_mwait = 1; | 634 | } else if (!strcmp(str, "mwait")) { |
638 | else if (!strcmp(str, "halt")) { | 635 | boot_option_idle_override = IDLE_FORCE_MWAIT; |
636 | } else if (!strcmp(str, "halt")) { | ||
639 | /* | 637 | /* |
640 | * When the boot option of idle=halt is added, halt is | 638 | * When the boot option of idle=halt is added, halt is |
641 | * forced to be used for CPU idle. In such case CPU C2/C3 | 639 | * forced to be used for CPU idle. In such case CPU C2/C3 |
@@ -644,8 +642,7 @@ static int __init idle_setup(char *str) | |||
644 | * the boot_option_idle_override. | 642 | * the boot_option_idle_override. |
645 | */ | 643 | */ |
646 | pm_idle = default_idle; | 644 | pm_idle = default_idle; |
647 | idle_halt = 1; | 645 | boot_option_idle_override = IDLE_HALT; |
648 | return 0; | ||
649 | } else if (!strcmp(str, "nomwait")) { | 646 | } else if (!strcmp(str, "nomwait")) { |
650 | /* | 647 | /* |
651 | * If the boot option of "idle=nomwait" is added, | 648 | * If the boot option of "idle=nomwait" is added, |
@@ -653,12 +650,10 @@ static int __init idle_setup(char *str) | |||
653 | * states. In such case it won't touch the variable | 650 | * states. In such case it won't touch the variable |
654 | * of boot_option_idle_override. | 651 | * of boot_option_idle_override. |
655 | */ | 652 | */ |
656 | idle_nomwait = 1; | 653 | boot_option_idle_override = IDLE_NOMWAIT; |
657 | return 0; | ||
658 | } else | 654 | } else |
659 | return -1; | 655 | return -1; |
660 | 656 | ||
661 | boot_option_idle_override = 1; | ||
662 | return 0; | 657 | return 0; |
663 | } | 658 | } |
664 | early_param("idle", idle_setup); | 659 | early_param("idle", idle_setup); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b9befa0e34..8d128783af4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -57,8 +57,6 @@ | |||
57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
58 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
59 | 59 | ||
60 | #include <trace/events/power.h> | ||
61 | |||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 61 | ||
64 | /* | 62 | /* |
@@ -113,8 +111,6 @@ void cpu_idle(void) | |||
113 | stop_critical_timings(); | 111 | stop_critical_timings(); |
114 | pm_idle(); | 112 | pm_idle(); |
115 | start_critical_timings(); | 113 | start_critical_timings(); |
116 | trace_power_end(smp_processor_id()); | ||
117 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | ||
118 | } | 114 | } |
119 | tick_nohz_restart_sched_tick(); | 115 | tick_nohz_restart_sched_tick(); |
120 | preempt_enable_no_resched(); | 116 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c818a73839..bd387e8f73b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -51,8 +51,6 @@ | |||
51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
52 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
53 | 53 | ||
54 | #include <trace/events/power.h> | ||
55 | |||
56 | asmlinkage extern void ret_from_fork(void); | 54 | asmlinkage extern void ret_from_fork(void); |
57 | 55 | ||
58 | DEFINE_PER_CPU(unsigned long, old_rsp); | 56 | DEFINE_PER_CPU(unsigned long, old_rsp); |
@@ -141,10 +139,6 @@ void cpu_idle(void) | |||
141 | pm_idle(); | 139 | pm_idle(); |
142 | start_critical_timings(); | 140 | start_critical_timings(); |
143 | 141 | ||
144 | trace_power_end(smp_processor_id()); | ||
145 | trace_cpu_idle(PWR_EVENT_EXIT, | ||
146 | smp_processor_id()); | ||
147 | |||
148 | /* In many cases the interrupt that ended idle | 142 | /* In many cases the interrupt that ended idle |
149 | has already called exit_idle. But some idle | 143 | has already called exit_idle. But some idle |
150 | loops can be woken up without interrupt. */ | 144 | loops can be woken up without interrupt. */ |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 763df77343d..03273b6c272 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -638,7 +638,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
638 | * target processor state. | 638 | * target processor state. |
639 | */ | 639 | */ |
640 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | 640 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, |
641 | (unsigned long)stack_start.sp); | 641 | stack_start); |
642 | 642 | ||
643 | /* | 643 | /* |
644 | * Run STARTUP IPI loop. | 644 | * Run STARTUP IPI loop. |
@@ -785,7 +785,7 @@ do_rest: | |||
785 | #endif | 785 | #endif |
786 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 786 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
787 | initial_code = (unsigned long)start_secondary; | 787 | initial_code = (unsigned long)start_secondary; |
788 | stack_start.sp = (void *) c_idle.idle->thread.sp; | 788 | stack_start = c_idle.idle->thread.sp; |
789 | 789 | ||
790 | /* start_ip had better be page-aligned! */ | 790 | /* start_ip had better be page-aligned! */ |
791 | start_ip = setup_trampoline(); | 791 | start_ip = setup_trampoline(); |
@@ -1402,8 +1402,9 @@ static inline void mwait_play_dead(void) | |||
1402 | unsigned int highest_subcstate = 0; | 1402 | unsigned int highest_subcstate = 0; |
1403 | int i; | 1403 | int i; |
1404 | void *mwait_ptr; | 1404 | void *mwait_ptr; |
1405 | struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); | ||
1405 | 1406 | ||
1406 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT)) | 1407 | if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c))) |
1407 | return; | 1408 | return; |
1408 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) | 1409 | if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH)) |
1409 | return; | 1410 | return; |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index c2f1b26141e..998e972f3b1 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, | |||
133 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); | 133 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); |
134 | if (!pmd) | 134 | if (!pmd) |
135 | return -1; | 135 | return -1; |
136 | pte = pte_alloc_map(&tboot_mm, pmd, vaddr); | 136 | pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); |
137 | if (!pte) | 137 | if (!pte) |
138 | return -1; | 138 | return -1; |
139 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); | 139 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 823f79a17ad..ffe5755caa8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -464,7 +464,7 @@ unsigned long native_calibrate_tsc(void) | |||
464 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | 464 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); |
465 | 465 | ||
466 | /* hpet or pmtimer available ? */ | 466 | /* hpet or pmtimer available ? */ |
467 | if (!hpet && !ref1 && !ref2) | 467 | if (ref1 == ref2) |
468 | continue; | 468 | continue; |
469 | 469 | ||
470 | /* Check, whether the sampling was disturbed by an SMI */ | 470 | /* Check, whether the sampling was disturbed by an SMI */ |
@@ -935,7 +935,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) | |||
935 | tsc_stop = tsc_read_refs(&ref_stop, hpet); | 935 | tsc_stop = tsc_read_refs(&ref_stop, hpet); |
936 | 936 | ||
937 | /* hpet or pmtimer available ? */ | 937 | /* hpet or pmtimer available ? */ |
938 | if (!hpet && !ref_start && !ref_stop) | 938 | if (ref_start == ref_stop) |
939 | goto out; | 939 | goto out; |
940 | 940 | ||
941 | /* Check, whether the sampling was disturbed by an SMI */ | 941 | /* Check, whether the sampling was disturbed by an SMI */ |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 61fb9851962..863f8753ab0 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -179,6 +179,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) | |||
179 | if (pud_none_or_clear_bad(pud)) | 179 | if (pud_none_or_clear_bad(pud)) |
180 | goto out; | 180 | goto out; |
181 | pmd = pmd_offset(pud, 0xA0000); | 181 | pmd = pmd_offset(pud, 0xA0000); |
182 | split_huge_page_pmd(mm, pmd); | ||
182 | if (pmd_none_or_clear_bad(pmd)) | 183 | if (pmd_none_or_clear_bad(pmd)) |
183 | goto out; | 184 | goto out; |
184 | pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); | 185 | pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9cafbb49981..f02b8edc3d4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -554,14 +554,18 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
554 | return ret; | 554 | return ret; |
555 | } | 555 | } |
556 | 556 | ||
557 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 557 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
558 | { | 558 | { |
559 | struct kvm_memory_slot *slot; | 559 | struct kvm_memory_slot *slot; |
560 | int host_level, level, max_level; | ||
561 | |||
562 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 560 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
563 | if (slot && slot->dirty_bitmap) | 561 | if (slot && slot->dirty_bitmap) |
564 | return PT_PAGE_TABLE_LEVEL; | 562 | return true; |
563 | return false; | ||
564 | } | ||
565 | |||
566 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
567 | { | ||
568 | int host_level, level, max_level; | ||
565 | 569 | ||
566 | host_level = host_mapping_level(vcpu->kvm, large_gfn); | 570 | host_level = host_mapping_level(vcpu->kvm, large_gfn); |
567 | 571 | ||
@@ -941,6 +945,35 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
941 | return young; | 945 | return young; |
942 | } | 946 | } |
943 | 947 | ||
948 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | ||
949 | unsigned long data) | ||
950 | { | ||
951 | u64 *spte; | ||
952 | int young = 0; | ||
953 | |||
954 | /* | ||
955 | * If there's no access bit in the secondary pte set by the | ||
956 | * hardware it's up to gup-fast/gup to set the access bit in | ||
957 | * the primary pte or in the page structure. | ||
958 | */ | ||
959 | if (!shadow_accessed_mask) | ||
960 | goto out; | ||
961 | |||
962 | spte = rmap_next(kvm, rmapp, NULL); | ||
963 | while (spte) { | ||
964 | u64 _spte = *spte; | ||
965 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
966 | young = _spte & PT_ACCESSED_MASK; | ||
967 | if (young) { | ||
968 | young = 1; | ||
969 | break; | ||
970 | } | ||
971 | spte = rmap_next(kvm, rmapp, spte); | ||
972 | } | ||
973 | out: | ||
974 | return young; | ||
975 | } | ||
976 | |||
944 | #define RMAP_RECYCLE_THRESHOLD 1000 | 977 | #define RMAP_RECYCLE_THRESHOLD 1000 |
945 | 978 | ||
946 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 979 | static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) |
@@ -961,6 +994,11 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva) | |||
961 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 994 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
962 | } | 995 | } |
963 | 996 | ||
997 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | ||
998 | { | ||
999 | return kvm_handle_hva(kvm, hva, 0, kvm_test_age_rmapp); | ||
1000 | } | ||
1001 | |||
964 | #ifdef MMU_DEBUG | 1002 | #ifdef MMU_DEBUG |
965 | static int is_empty_shadow_page(u64 *spt) | 1003 | static int is_empty_shadow_page(u64 *spt) |
966 | { | 1004 | { |
@@ -2281,6 +2319,48 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | |||
2281 | return 1; | 2319 | return 1; |
2282 | } | 2320 | } |
2283 | 2321 | ||
2322 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | ||
2323 | gfn_t *gfnp, pfn_t *pfnp, int *levelp) | ||
2324 | { | ||
2325 | pfn_t pfn = *pfnp; | ||
2326 | gfn_t gfn = *gfnp; | ||
2327 | int level = *levelp; | ||
2328 | |||
2329 | /* | ||
2330 | * Check if it's a transparent hugepage. If this would be an | ||
2331 | * hugetlbfs page, level wouldn't be set to | ||
2332 | * PT_PAGE_TABLE_LEVEL and there would be no adjustment done | ||
2333 | * here. | ||
2334 | */ | ||
2335 | if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && | ||
2336 | level == PT_PAGE_TABLE_LEVEL && | ||
2337 | PageTransCompound(pfn_to_page(pfn)) && | ||
2338 | !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { | ||
2339 | unsigned long mask; | ||
2340 | /* | ||
2341 | * mmu_notifier_retry was successful and we hold the | ||
2342 | * mmu_lock here, so the pmd can't become splitting | ||
2343 | * from under us, and in turn | ||
2344 | * __split_huge_page_refcount() can't run from under | ||
2345 | * us and we can safely transfer the refcount from | ||
2346 | * PG_tail to PG_head as we switch the pfn to tail to | ||
2347 | * head. | ||
2348 | */ | ||
2349 | *levelp = level = PT_DIRECTORY_LEVEL; | ||
2350 | mask = KVM_PAGES_PER_HPAGE(level) - 1; | ||
2351 | VM_BUG_ON((gfn & mask) != (pfn & mask)); | ||
2352 | if (pfn & mask) { | ||
2353 | gfn &= ~mask; | ||
2354 | *gfnp = gfn; | ||
2355 | kvm_release_pfn_clean(pfn); | ||
2356 | pfn &= ~mask; | ||
2357 | if (!get_page_unless_zero(pfn_to_page(pfn))) | ||
2358 | BUG(); | ||
2359 | *pfnp = pfn; | ||
2360 | } | ||
2361 | } | ||
2362 | } | ||
2363 | |||
2284 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2364 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2285 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2365 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
2286 | 2366 | ||
@@ -2289,20 +2369,25 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | |||
2289 | { | 2369 | { |
2290 | int r; | 2370 | int r; |
2291 | int level; | 2371 | int level; |
2372 | int force_pt_level; | ||
2292 | pfn_t pfn; | 2373 | pfn_t pfn; |
2293 | unsigned long mmu_seq; | 2374 | unsigned long mmu_seq; |
2294 | bool map_writable; | 2375 | bool map_writable; |
2295 | 2376 | ||
2296 | level = mapping_level(vcpu, gfn); | 2377 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2297 | 2378 | if (likely(!force_pt_level)) { | |
2298 | /* | 2379 | level = mapping_level(vcpu, gfn); |
2299 | * This path builds a PAE pagetable - so we can map 2mb pages at | 2380 | /* |
2300 | * maximum. Therefore check if the level is larger than that. | 2381 | * This path builds a PAE pagetable - so we can map |
2301 | */ | 2382 | * 2mb pages at maximum. Therefore check if the level |
2302 | if (level > PT_DIRECTORY_LEVEL) | 2383 | * is larger than that. |
2303 | level = PT_DIRECTORY_LEVEL; | 2384 | */ |
2385 | if (level > PT_DIRECTORY_LEVEL) | ||
2386 | level = PT_DIRECTORY_LEVEL; | ||
2304 | 2387 | ||
2305 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2388 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); |
2389 | } else | ||
2390 | level = PT_PAGE_TABLE_LEVEL; | ||
2306 | 2391 | ||
2307 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2392 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2308 | smp_rmb(); | 2393 | smp_rmb(); |
@@ -2318,6 +2403,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | |||
2318 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2403 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2319 | goto out_unlock; | 2404 | goto out_unlock; |
2320 | kvm_mmu_free_some_pages(vcpu); | 2405 | kvm_mmu_free_some_pages(vcpu); |
2406 | if (likely(!force_pt_level)) | ||
2407 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | ||
2321 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, | 2408 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, |
2322 | prefault); | 2409 | prefault); |
2323 | spin_unlock(&vcpu->kvm->mmu_lock); | 2410 | spin_unlock(&vcpu->kvm->mmu_lock); |
@@ -2655,6 +2742,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
2655 | pfn_t pfn; | 2742 | pfn_t pfn; |
2656 | int r; | 2743 | int r; |
2657 | int level; | 2744 | int level; |
2745 | int force_pt_level; | ||
2658 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2746 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2659 | unsigned long mmu_seq; | 2747 | unsigned long mmu_seq; |
2660 | int write = error_code & PFERR_WRITE_MASK; | 2748 | int write = error_code & PFERR_WRITE_MASK; |
@@ -2667,9 +2755,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
2667 | if (r) | 2755 | if (r) |
2668 | return r; | 2756 | return r; |
2669 | 2757 | ||
2670 | level = mapping_level(vcpu, gfn); | 2758 | force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); |
2671 | 2759 | if (likely(!force_pt_level)) { | |
2672 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | 2760 | level = mapping_level(vcpu, gfn); |
2761 | gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); | ||
2762 | } else | ||
2763 | level = PT_PAGE_TABLE_LEVEL; | ||
2673 | 2764 | ||
2674 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2765 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2675 | smp_rmb(); | 2766 | smp_rmb(); |
@@ -2684,6 +2775,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
2684 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2775 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2685 | goto out_unlock; | 2776 | goto out_unlock; |
2686 | kvm_mmu_free_some_pages(vcpu); | 2777 | kvm_mmu_free_some_pages(vcpu); |
2778 | if (likely(!force_pt_level)) | ||
2779 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | ||
2687 | r = __direct_map(vcpu, gpa, write, map_writable, | 2780 | r = __direct_map(vcpu, gpa, write, map_writable, |
2688 | level, gfn, pfn, prefault); | 2781 | level, gfn, pfn, prefault); |
2689 | spin_unlock(&vcpu->kvm->mmu_lock); | 2782 | spin_unlock(&vcpu->kvm->mmu_lock); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 53210f1e94c..6bccc24c418 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -550,6 +550,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
550 | int r; | 550 | int r; |
551 | pfn_t pfn; | 551 | pfn_t pfn; |
552 | int level = PT_PAGE_TABLE_LEVEL; | 552 | int level = PT_PAGE_TABLE_LEVEL; |
553 | int force_pt_level; | ||
553 | unsigned long mmu_seq; | 554 | unsigned long mmu_seq; |
554 | bool map_writable; | 555 | bool map_writable; |
555 | 556 | ||
@@ -577,7 +578,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
577 | return 0; | 578 | return 0; |
578 | } | 579 | } |
579 | 580 | ||
580 | if (walker.level >= PT_DIRECTORY_LEVEL) { | 581 | if (walker.level >= PT_DIRECTORY_LEVEL) |
582 | force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); | ||
583 | else | ||
584 | force_pt_level = 1; | ||
585 | if (!force_pt_level) { | ||
581 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); | 586 | level = min(walker.level, mapping_level(vcpu, walker.gfn)); |
582 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); | 587 | walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); |
583 | } | 588 | } |
@@ -599,6 +604,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
599 | 604 | ||
600 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 605 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
601 | kvm_mmu_free_some_pages(vcpu); | 606 | kvm_mmu_free_some_pages(vcpu); |
607 | if (!force_pt_level) | ||
608 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | ||
602 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 609 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
603 | level, &write_pt, pfn, map_writable, prefault); | 610 | level, &write_pt, pfn, map_writable, prefault); |
604 | (void)sptep; | 611 | (void)sptep; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 25bd1bc5aad..54ce246a383 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1150,8 +1150,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1150 | kvm_load_ldt(svm->host.ldt); | 1150 | kvm_load_ldt(svm->host.ldt); |
1151 | #ifdef CONFIG_X86_64 | 1151 | #ifdef CONFIG_X86_64 |
1152 | loadsegment(fs, svm->host.fs); | 1152 | loadsegment(fs, svm->host.fs); |
1153 | load_gs_index(svm->host.gs); | ||
1154 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | 1153 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); |
1154 | load_gs_index(svm->host.gs); | ||
1155 | #else | 1155 | #else |
1156 | loadsegment(gs, svm->host.gs); | 1156 | loadsegment(gs, svm->host.gs); |
1157 | #endif | 1157 | #endif |
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 38718041efc..6e121a2a49e 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig | |||
@@ -2,6 +2,7 @@ config LGUEST_GUEST | |||
2 | bool "Lguest guest support" | 2 | bool "Lguest guest support" |
3 | select PARAVIRT | 3 | select PARAVIRT |
4 | depends on X86_32 | 4 | depends on X86_32 |
5 | select VIRTUALIZATION | ||
5 | select VIRTIO | 6 | select VIRTIO |
6 | select VIRTIO_RING | 7 | select VIRTIO_RING |
7 | select VIRTIO_CONSOLE | 8 | select VIRTIO_CONSOLE |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4996cf5f73a..eba687f0cc0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -824,7 +824,7 @@ static void __init lguest_init_IRQ(void) | |||
824 | 824 | ||
825 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 825 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
826 | /* Some systems map "vectors" to interrupts weirdly. Not us! */ | 826 | /* Some systems map "vectors" to interrupts weirdly. Not us! */ |
827 | __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; | 827 | __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); |
828 | if (i != SYSCALL_VECTOR) | 828 | if (i != SYSCALL_VECTOR) |
829 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); | 829 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); |
830 | } | 830 | } |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 738e6593799..dbe34b93137 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/vmstat.h> | 9 | #include <linux/vmstat.h> |
10 | #include <linux/highmem.h> | 10 | #include <linux/highmem.h> |
11 | #include <linux/swap.h> | ||
11 | 12 | ||
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | 14 | ||
@@ -89,6 +90,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 90 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
90 | page = pte_page(pte); | 91 | page = pte_page(pte); |
91 | get_page(page); | 92 | get_page(page); |
93 | SetPageReferenced(page); | ||
92 | pages[*nr] = page; | 94 | pages[*nr] = page; |
93 | (*nr)++; | 95 | (*nr)++; |
94 | 96 | ||
@@ -103,6 +105,17 @@ static inline void get_head_page_multiple(struct page *page, int nr) | |||
103 | VM_BUG_ON(page != compound_head(page)); | 105 | VM_BUG_ON(page != compound_head(page)); |
104 | VM_BUG_ON(page_count(page) == 0); | 106 | VM_BUG_ON(page_count(page) == 0); |
105 | atomic_add(nr, &page->_count); | 107 | atomic_add(nr, &page->_count); |
108 | SetPageReferenced(page); | ||
109 | } | ||
110 | |||
111 | static inline void get_huge_page_tail(struct page *page) | ||
112 | { | ||
113 | /* | ||
114 | * __split_huge_page_refcount() cannot run | ||
115 | * from under us. | ||
116 | */ | ||
117 | VM_BUG_ON(atomic_read(&page->_count) < 0); | ||
118 | atomic_inc(&page->_count); | ||
106 | } | 119 | } |
107 | 120 | ||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | 121 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, |
@@ -128,6 +141,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
128 | do { | 141 | do { |
129 | VM_BUG_ON(compound_head(page) != head); | 142 | VM_BUG_ON(compound_head(page) != head); |
130 | pages[*nr] = page; | 143 | pages[*nr] = page; |
144 | if (PageTail(page)) | ||
145 | get_huge_page_tail(page); | ||
131 | (*nr)++; | 146 | (*nr)++; |
132 | page++; | 147 | page++; |
133 | refs++; | 148 | refs++; |
@@ -148,7 +163,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
148 | pmd_t pmd = *pmdp; | 163 | pmd_t pmd = *pmdp; |
149 | 164 | ||
150 | next = pmd_addr_end(addr, end); | 165 | next = pmd_addr_end(addr, end); |
151 | if (pmd_none(pmd)) | 166 | /* |
167 | * The pmd_trans_splitting() check below explains why | ||
168 | * pmdp_splitting_flush has to flush the tlb, to stop | ||
169 | * this gup-fast code from running while we set the | ||
170 | * splitting bit in the pmd. Returning zero will take | ||
171 | * the slow path that will call wait_split_huge_page() | ||
172 | * if the pmd is still in splitting state. gup-fast | ||
173 | * can't because it has irq disabled and | ||
174 | * wait_split_huge_page() would never return as the | ||
175 | * tlb flush IPI wouldn't run. | ||
176 | */ | ||
177 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
152 | return 0; | 178 | return 0; |
153 | if (unlikely(pmd_large(pmd))) { | 179 | if (unlikely(pmd_large(pmd))) { |
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | 180 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 787c52ca49c..ebf6d7887a3 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -2,6 +2,28 @@ | |||
2 | #include <linux/topology.h> | 2 | #include <linux/topology.h> |
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <asm/numa.h> | ||
6 | #include <asm/acpi.h> | ||
7 | |||
8 | int __initdata numa_off; | ||
9 | |||
10 | static __init int numa_setup(char *opt) | ||
11 | { | ||
12 | if (!opt) | ||
13 | return -EINVAL; | ||
14 | if (!strncmp(opt, "off", 3)) | ||
15 | numa_off = 1; | ||
16 | #ifdef CONFIG_NUMA_EMU | ||
17 | if (!strncmp(opt, "fake=", 5)) | ||
18 | numa_emu_cmdline(opt + 5); | ||
19 | #endif | ||
20 | #ifdef CONFIG_ACPI_NUMA | ||
21 | if (!strncmp(opt, "noacpi", 6)) | ||
22 | acpi_numa = -1; | ||
23 | #endif | ||
24 | return 0; | ||
25 | } | ||
26 | early_param("numa", numa_setup); | ||
5 | 27 | ||
6 | /* | 28 | /* |
7 | * Which logical CPUs are on which nodes | 29 | * Which logical CPUs are on which nodes |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 1e72102e80c..95ea1551eeb 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -30,7 +30,6 @@ s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
30 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 30 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
31 | }; | 31 | }; |
32 | 32 | ||
33 | int numa_off __initdata; | ||
34 | static unsigned long __initdata nodemap_addr; | 33 | static unsigned long __initdata nodemap_addr; |
35 | static unsigned long __initdata nodemap_size; | 34 | static unsigned long __initdata nodemap_size; |
36 | 35 | ||
@@ -263,6 +262,11 @@ static struct bootnode nodes[MAX_NUMNODES] __initdata; | |||
263 | static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; | 262 | static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; |
264 | static char *cmdline __initdata; | 263 | static char *cmdline __initdata; |
265 | 264 | ||
265 | void __init numa_emu_cmdline(char *str) | ||
266 | { | ||
267 | cmdline = str; | ||
268 | } | ||
269 | |||
266 | static int __init setup_physnodes(unsigned long start, unsigned long end, | 270 | static int __init setup_physnodes(unsigned long start, unsigned long end, |
267 | int acpi, int amd) | 271 | int acpi, int amd) |
268 | { | 272 | { |
@@ -670,24 +674,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
670 | return pages; | 674 | return pages; |
671 | } | 675 | } |
672 | 676 | ||
673 | static __init int numa_setup(char *opt) | ||
674 | { | ||
675 | if (!opt) | ||
676 | return -EINVAL; | ||
677 | if (!strncmp(opt, "off", 3)) | ||
678 | numa_off = 1; | ||
679 | #ifdef CONFIG_NUMA_EMU | ||
680 | if (!strncmp(opt, "fake=", 5)) | ||
681 | cmdline = opt + 5; | ||
682 | #endif | ||
683 | #ifdef CONFIG_ACPI_NUMA | ||
684 | if (!strncmp(opt, "noacpi", 6)) | ||
685 | acpi_numa = -1; | ||
686 | #endif | ||
687 | return 0; | ||
688 | } | ||
689 | early_param("numa", numa_setup); | ||
690 | |||
691 | #ifdef CONFIG_NUMA | 677 | #ifdef CONFIG_NUMA |
692 | 678 | ||
693 | static __init int find_near_online_node(int node) | 679 | static __init int find_near_online_node(int node) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8b830ca14ac..d343b3c81f3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -256,7 +256,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
256 | unsigned long pfn) | 256 | unsigned long pfn) |
257 | { | 257 | { |
258 | pgprot_t forbidden = __pgprot(0); | 258 | pgprot_t forbidden = __pgprot(0); |
259 | pgprot_t required = __pgprot(0); | ||
260 | 259 | ||
261 | /* | 260 | /* |
262 | * The BIOS area between 640k and 1Mb needs to be executable for | 261 | * The BIOS area between 640k and 1Mb needs to be executable for |
@@ -282,12 +281,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
282 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, | 281 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, |
283 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 282 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
284 | pgprot_val(forbidden) |= _PAGE_RW; | 283 | pgprot_val(forbidden) |= _PAGE_RW; |
285 | /* | ||
286 | * .data and .bss should always be writable. | ||
287 | */ | ||
288 | if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || | ||
289 | within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) | ||
290 | pgprot_val(required) |= _PAGE_RW; | ||
291 | 284 | ||
292 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | 285 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) |
293 | /* | 286 | /* |
@@ -327,7 +320,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
327 | #endif | 320 | #endif |
328 | 321 | ||
329 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 322 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
330 | prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); | ||
331 | 323 | ||
332 | return prot; | 324 | return prot; |
333 | } | 325 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8be8c7d7bc8..500242d3c96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -320,6 +320,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma, | |||
320 | return changed; | 320 | return changed; |
321 | } | 321 | } |
322 | 322 | ||
323 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
324 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
325 | unsigned long address, pmd_t *pmdp, | ||
326 | pmd_t entry, int dirty) | ||
327 | { | ||
328 | int changed = !pmd_same(*pmdp, entry); | ||
329 | |||
330 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
331 | |||
332 | if (changed && dirty) { | ||
333 | *pmdp = entry; | ||
334 | pmd_update_defer(vma->vm_mm, address, pmdp); | ||
335 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
336 | } | ||
337 | |||
338 | return changed; | ||
339 | } | ||
340 | #endif | ||
341 | |||
323 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 342 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
324 | unsigned long addr, pte_t *ptep) | 343 | unsigned long addr, pte_t *ptep) |
325 | { | 344 | { |
@@ -335,6 +354,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
335 | return ret; | 354 | return ret; |
336 | } | 355 | } |
337 | 356 | ||
357 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
358 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
359 | unsigned long addr, pmd_t *pmdp) | ||
360 | { | ||
361 | int ret = 0; | ||
362 | |||
363 | if (pmd_young(*pmdp)) | ||
364 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
365 | (unsigned long *)pmdp); | ||
366 | |||
367 | if (ret) | ||
368 | pmd_update(vma->vm_mm, addr, pmdp); | ||
369 | |||
370 | return ret; | ||
371 | } | ||
372 | #endif | ||
373 | |||
338 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 374 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
339 | unsigned long address, pte_t *ptep) | 375 | unsigned long address, pte_t *ptep) |
340 | { | 376 | { |
@@ -347,6 +383,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
347 | return young; | 383 | return young; |
348 | } | 384 | } |
349 | 385 | ||
386 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
387 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
388 | unsigned long address, pmd_t *pmdp) | ||
389 | { | ||
390 | int young; | ||
391 | |||
392 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
393 | |||
394 | young = pmdp_test_and_clear_young(vma, address, pmdp); | ||
395 | if (young) | ||
396 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
397 | |||
398 | return young; | ||
399 | } | ||
400 | |||
401 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
402 | unsigned long address, pmd_t *pmdp) | ||
403 | { | ||
404 | int set; | ||
405 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
406 | set = !test_and_set_bit(_PAGE_BIT_SPLITTING, | ||
407 | (unsigned long *)pmdp); | ||
408 | if (set) { | ||
409 | pmd_update(vma->vm_mm, address, pmdp); | ||
410 | /* need tlb flush only to serialize against gup-fast */ | ||
411 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
412 | } | ||
413 | } | ||
414 | #endif | ||
415 | |||
350 | /** | 416 | /** |
351 | * reserve_top_address - reserves a hole in the top of kernel address space | 417 | * reserve_top_address - reserves a hole in the top of kernel address space |
352 | * @reserve - size of hole to reserve | 418 | * @reserve - size of hole to reserve |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index f16434568a5..ae96e7b8051 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -59,7 +59,6 @@ static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; | |||
59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ | 59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ |
60 | static u8 __initdata apicid_to_pxm[MAX_APICID]; | 60 | static u8 __initdata apicid_to_pxm[MAX_APICID]; |
61 | 61 | ||
62 | int numa_off __initdata; | ||
63 | int acpi_numa __initdata; | 62 | int acpi_numa __initdata; |
64 | 63 | ||
65 | static __init void bad_srat(void) | 64 | static __init void bad_srat(void) |
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index 0846a5bbbfb..ab8269b0da2 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c | |||
@@ -9,6 +9,7 @@ | |||
9 | * option) any later version. | 9 | * option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/acpi.h> | ||
12 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
13 | #include <linux/dmi.h> | 14 | #include <linux/dmi.h> |
14 | #include <linux/pci.h> | 15 | #include <linux/pci.h> |
@@ -25,12 +26,14 @@ static void __devinit cnb20le_res(struct pci_dev *dev) | |||
25 | u8 fbus, lbus; | 26 | u8 fbus, lbus; |
26 | int i; | 27 | int i; |
27 | 28 | ||
29 | #ifdef CONFIG_ACPI | ||
28 | /* | 30 | /* |
29 | * The x86_pci_root_bus_res_quirks() function already refuses to use | 31 | * We should get host bridge information from ACPI unless the BIOS |
30 | * this information if ACPI _CRS was used. Therefore, we don't bother | 32 | * doesn't support it. |
31 | * checking if ACPI is enabled, and just generate the information | ||
32 | * for both the ACPI _CRS and no ACPI cases. | ||
33 | */ | 33 | */ |
34 | if (acpi_os_get_root_pointer()) | ||
35 | return; | ||
36 | #endif | ||
34 | 37 | ||
35 | info = &pci_root_info[pci_root_num]; | 38 | info = &pci_root_info[pci_root_num]; |
36 | pci_root_num++; | 39 | pci_root_num++; |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index f7c8a399978..5fe75026ecc 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | | |||
22 | 22 | ||
23 | unsigned int pci_early_dump_regs; | 23 | unsigned int pci_early_dump_regs; |
24 | static int pci_bf_sort; | 24 | static int pci_bf_sort; |
25 | static int smbios_type_b1_flag; | ||
25 | int pci_routeirq; | 26 | int pci_routeirq; |
26 | int noioapicquirk; | 27 | int noioapicquirk; |
27 | #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 28 | #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
@@ -185,6 +186,39 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d) | |||
185 | return 0; | 186 | return 0; |
186 | } | 187 | } |
187 | 188 | ||
189 | static void __devinit read_dmi_type_b1(const struct dmi_header *dm, | ||
190 | void *private_data) | ||
191 | { | ||
192 | u8 *d = (u8 *)dm + 4; | ||
193 | |||
194 | if (dm->type != 0xB1) | ||
195 | return; | ||
196 | switch (((*(u32 *)d) >> 9) & 0x03) { | ||
197 | case 0x00: | ||
198 | printk(KERN_INFO "dmi type 0xB1 record - unknown flag\n"); | ||
199 | break; | ||
200 | case 0x01: /* set pci=bfsort */ | ||
201 | smbios_type_b1_flag = 1; | ||
202 | break; | ||
203 | case 0x02: /* do not set pci=bfsort */ | ||
204 | smbios_type_b1_flag = 2; | ||
205 | break; | ||
206 | default: | ||
207 | break; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static int __devinit find_sort_method(const struct dmi_system_id *d) | ||
212 | { | ||
213 | dmi_walk(read_dmi_type_b1, NULL); | ||
214 | |||
215 | if (smbios_type_b1_flag == 1) { | ||
216 | set_bf_sort(d); | ||
217 | return 0; | ||
218 | } | ||
219 | return -1; | ||
220 | } | ||
221 | |||
188 | /* | 222 | /* |
189 | * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) | 223 | * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) |
190 | */ | 224 | */ |
@@ -213,6 +247,13 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = { | |||
213 | }, | 247 | }, |
214 | #endif /* __i386__ */ | 248 | #endif /* __i386__ */ |
215 | { | 249 | { |
250 | .callback = find_sort_method, | ||
251 | .ident = "Dell System", | ||
252 | .matches = { | ||
253 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), | ||
254 | }, | ||
255 | }, | ||
256 | { | ||
216 | .callback = set_bf_sort, | 257 | .callback = set_bf_sort, |
217 | .ident = "Dell PowerEdge 1950", | 258 | .ident = "Dell PowerEdge 1950", |
218 | .matches = { | 259 | .matches = { |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9f9bfb705cf..87e6c832311 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -589,7 +589,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
589 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 589 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
590 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 590 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
591 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 591 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
592 | case PCI_DEVICE_ID_INTEL_PATSBURG_LPC: | 592 | case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0: |
593 | case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1: | ||
593 | r->name = "PIIX/ICH"; | 594 | r->name = "PIIX/ICH"; |
594 | r->get = pirq_piix_get; | 595 | r->get = pirq_piix_get; |
595 | r->set = pirq_piix_set; | 596 | r->set = pirq_piix_set; |
diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c index f5442c03abc..127775696d6 100644 --- a/arch/x86/platform/olpc/olpc-xo1.c +++ b/arch/x86/platform/olpc/olpc-xo1.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Support for features of the OLPC XO-1 laptop | 2 | * Support for features of the OLPC XO-1 laptop |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Andres Salomon <dilinger@queued.net> | ||
4 | * Copyright (C) 2010 One Laptop per Child | 5 | * Copyright (C) 2010 One Laptop per Child |
5 | * Copyright (C) 2006 Red Hat, Inc. | 6 | * Copyright (C) 2006 Red Hat, Inc. |
6 | * Copyright (C) 2006 Advanced Micro Devices, Inc. | 7 | * Copyright (C) 2006 Advanced Micro Devices, Inc. |
@@ -12,8 +13,6 @@ | |||
12 | */ | 13 | */ |
13 | 14 | ||
14 | #include <linux/module.h> | 15 | #include <linux/module.h> |
15 | #include <linux/pci.h> | ||
16 | #include <linux/pci_ids.h> | ||
17 | #include <linux/platform_device.h> | 16 | #include <linux/platform_device.h> |
18 | #include <linux/pm.h> | 17 | #include <linux/pm.h> |
19 | 18 | ||
@@ -22,9 +21,6 @@ | |||
22 | 21 | ||
23 | #define DRV_NAME "olpc-xo1" | 22 | #define DRV_NAME "olpc-xo1" |
24 | 23 | ||
25 | #define PMS_BAR 4 | ||
26 | #define ACPI_BAR 5 | ||
27 | |||
28 | /* PMC registers (PMS block) */ | 24 | /* PMC registers (PMS block) */ |
29 | #define PM_SCLK 0x10 | 25 | #define PM_SCLK 0x10 |
30 | #define PM_IN_SLPCTL 0x20 | 26 | #define PM_IN_SLPCTL 0x20 |
@@ -57,65 +53,67 @@ static void xo1_power_off(void) | |||
57 | outl(0x00002000, acpi_base + PM1_CNT); | 53 | outl(0x00002000, acpi_base + PM1_CNT); |
58 | } | 54 | } |
59 | 55 | ||
60 | /* Read the base addresses from the PCI BAR info */ | 56 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) |
61 | static int __devinit setup_bases(struct pci_dev *pdev) | ||
62 | { | 57 | { |
63 | int r; | 58 | struct resource *res; |
64 | 59 | ||
65 | r = pci_enable_device_io(pdev); | 60 | /* don't run on non-XOs */ |
66 | if (r) { | 61 | if (!machine_is_olpc()) |
67 | dev_err(&pdev->dev, "can't enable device IO\n"); | 62 | return -ENODEV; |
68 | return r; | ||
69 | } | ||
70 | 63 | ||
71 | r = pci_request_region(pdev, ACPI_BAR, DRV_NAME); | 64 | res = platform_get_resource(pdev, IORESOURCE_IO, 0); |
72 | if (r) { | 65 | if (!res) { |
73 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", ACPI_BAR); | 66 | dev_err(&pdev->dev, "can't fetch device resource info\n"); |
74 | return r; | 67 | return -EIO; |
75 | } | 68 | } |
76 | 69 | ||
77 | r = pci_request_region(pdev, PMS_BAR, DRV_NAME); | 70 | if (!request_region(res->start, resource_size(res), DRV_NAME)) { |
78 | if (r) { | 71 | dev_err(&pdev->dev, "can't request region\n"); |
79 | dev_err(&pdev->dev, "can't alloc PCI BAR #%d\n", PMS_BAR); | 72 | return -EIO; |
80 | pci_release_region(pdev, ACPI_BAR); | ||
81 | return r; | ||
82 | } | 73 | } |
83 | 74 | ||
84 | acpi_base = pci_resource_start(pdev, ACPI_BAR); | 75 | if (strcmp(pdev->name, "cs5535-pms") == 0) |
85 | pms_base = pci_resource_start(pdev, PMS_BAR); | 76 | pms_base = res->start; |
77 | else if (strcmp(pdev->name, "cs5535-acpi") == 0) | ||
78 | acpi_base = res->start; | ||
79 | |||
80 | /* If we have both addresses, we can override the poweroff hook */ | ||
81 | if (pms_base && acpi_base) { | ||
82 | pm_power_off = xo1_power_off; | ||
83 | printk(KERN_INFO "OLPC XO-1 support registered\n"); | ||
84 | } | ||
86 | 85 | ||
87 | return 0; | 86 | return 0; |
88 | } | 87 | } |
89 | 88 | ||
90 | static int __devinit olpc_xo1_probe(struct platform_device *pdev) | 89 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) |
91 | { | 90 | { |
92 | struct pci_dev *pcidev; | 91 | struct resource *r; |
93 | int r; | ||
94 | |||
95 | pcidev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, | ||
96 | NULL); | ||
97 | if (!pdev) | ||
98 | return -ENODEV; | ||
99 | |||
100 | r = setup_bases(pcidev); | ||
101 | if (r) | ||
102 | return r; | ||
103 | 92 | ||
104 | pm_power_off = xo1_power_off; | 93 | r = platform_get_resource(pdev, IORESOURCE_IO, 0); |
94 | release_region(r->start, resource_size(r)); | ||
105 | 95 | ||
106 | printk(KERN_INFO "OLPC XO-1 support registered\n"); | 96 | if (strcmp(pdev->name, "cs5535-pms") == 0) |
107 | return 0; | 97 | pms_base = 0; |
108 | } | 98 | else if (strcmp(pdev->name, "cs5535-acpi") == 0) |
99 | acpi_base = 0; | ||
109 | 100 | ||
110 | static int __devexit olpc_xo1_remove(struct platform_device *pdev) | ||
111 | { | ||
112 | pm_power_off = NULL; | 101 | pm_power_off = NULL; |
113 | return 0; | 102 | return 0; |
114 | } | 103 | } |
115 | 104 | ||
116 | static struct platform_driver olpc_xo1_driver = { | 105 | static struct platform_driver cs5535_pms_drv = { |
106 | .driver = { | ||
107 | .name = "cs5535-pms", | ||
108 | .owner = THIS_MODULE, | ||
109 | }, | ||
110 | .probe = olpc_xo1_probe, | ||
111 | .remove = __devexit_p(olpc_xo1_remove), | ||
112 | }; | ||
113 | |||
114 | static struct platform_driver cs5535_acpi_drv = { | ||
117 | .driver = { | 115 | .driver = { |
118 | .name = DRV_NAME, | 116 | .name = "cs5535-acpi", |
119 | .owner = THIS_MODULE, | 117 | .owner = THIS_MODULE, |
120 | }, | 118 | }, |
121 | .probe = olpc_xo1_probe, | 119 | .probe = olpc_xo1_probe, |
@@ -124,12 +122,23 @@ static struct platform_driver olpc_xo1_driver = { | |||
124 | 122 | ||
125 | static int __init olpc_xo1_init(void) | 123 | static int __init olpc_xo1_init(void) |
126 | { | 124 | { |
127 | return platform_driver_register(&olpc_xo1_driver); | 125 | int r; |
126 | |||
127 | r = platform_driver_register(&cs5535_pms_drv); | ||
128 | if (r) | ||
129 | return r; | ||
130 | |||
131 | r = platform_driver_register(&cs5535_acpi_drv); | ||
132 | if (r) | ||
133 | platform_driver_unregister(&cs5535_pms_drv); | ||
134 | |||
135 | return r; | ||
128 | } | 136 | } |
129 | 137 | ||
130 | static void __exit olpc_xo1_exit(void) | 138 | static void __exit olpc_xo1_exit(void) |
131 | { | 139 | { |
132 | platform_driver_unregister(&olpc_xo1_driver); | 140 | platform_driver_unregister(&cs5535_acpi_drv); |
141 | platform_driver_unregister(&cs5535_pms_drv); | ||
133 | } | 142 | } |
134 | 143 | ||
135 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); | 144 | MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>"); |
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 77938515891..17c565de3d6 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -12,7 +12,8 @@ CFLAGS_mmu.o := $(nostackp) | |||
12 | 12 | ||
13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ | 13 | obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ |
14 | time.o xen-asm.o xen-asm_$(BITS).o \ | 14 | time.o xen-asm.o xen-asm_$(BITS).o \ |
15 | grant-table.o suspend.o platform-pci-unplug.o | 15 | grant-table.o suspend.o platform-pci-unplug.o \ |
16 | p2m.o | ||
16 | 17 | ||
17 | obj-$(CONFIG_SMP) += smp.o | 18 | obj-$(CONFIG_SMP) += smp.o |
18 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o | 19 | obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7e8d3bc80af..50542efe45f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1194,7 +1194,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1194 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1194 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
1195 | 1195 | ||
1196 | local_irq_disable(); | 1196 | local_irq_disable(); |
1197 | early_boot_irqs_off(); | 1197 | early_boot_irqs_disabled = true; |
1198 | 1198 | ||
1199 | memblock_init(); | 1199 | memblock_init(); |
1200 | 1200 | ||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 9d30105a0c4..6a6fe893964 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -126,7 +126,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { | |||
126 | #endif | 126 | #endif |
127 | }; | 127 | }; |
128 | 128 | ||
129 | void __init xen_init_irq_ops() | 129 | void __init xen_init_irq_ops(void) |
130 | { | 130 | { |
131 | pv_irq_ops = xen_irq_ops; | 131 | pv_irq_ops = xen_irq_ops; |
132 | x86_init.irqs.intr_init = xen_init_IRQ; | 132 | x86_init.irqs.intr_init = xen_init_IRQ; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 44924e551fd..5e92b61ad57 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -173,371 +173,6 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
173 | */ | 173 | */ |
174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | 174 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) |
175 | 175 | ||
176 | /* | ||
177 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
178 | * guests themselves, but it must also access and update the p2m array | ||
179 | * during suspend/resume when all the pages are reallocated. | ||
180 | * | ||
181 | * The p2m table is logically a flat array, but we implement it as a | ||
182 | * three-level tree to allow the address space to be sparse. | ||
183 | * | ||
184 | * Xen | ||
185 | * | | ||
186 | * p2m_top p2m_top_mfn | ||
187 | * / \ / \ | ||
188 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
189 | * / \ / \ / / | ||
190 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
191 | * | ||
192 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
193 | * | ||
194 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
195 | * maximum representable pseudo-physical address space is: | ||
196 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
197 | * | ||
198 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
199 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
200 | * 512 and 1024 entries respectively. | ||
201 | */ | ||
202 | |||
203 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
204 | |||
205 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
206 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
207 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
208 | |||
209 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
210 | |||
211 | /* Placeholders for holes in the address space */ | ||
212 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
213 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
214 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
215 | |||
216 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
217 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
218 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
219 | |||
220 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
221 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
222 | |||
223 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
224 | { | ||
225 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
226 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
227 | } | ||
228 | |||
229 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
230 | { | ||
231 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
232 | } | ||
233 | |||
234 | static inline unsigned p2m_index(unsigned long pfn) | ||
235 | { | ||
236 | return pfn % P2M_PER_PAGE; | ||
237 | } | ||
238 | |||
239 | static void p2m_top_init(unsigned long ***top) | ||
240 | { | ||
241 | unsigned i; | ||
242 | |||
243 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
244 | top[i] = p2m_mid_missing; | ||
245 | } | ||
246 | |||
247 | static void p2m_top_mfn_init(unsigned long *top) | ||
248 | { | ||
249 | unsigned i; | ||
250 | |||
251 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
252 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
253 | } | ||
254 | |||
255 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
256 | { | ||
257 | unsigned i; | ||
258 | |||
259 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
260 | top[i] = p2m_mid_missing_mfn; | ||
261 | } | ||
262 | |||
263 | static void p2m_mid_init(unsigned long **mid) | ||
264 | { | ||
265 | unsigned i; | ||
266 | |||
267 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
268 | mid[i] = p2m_missing; | ||
269 | } | ||
270 | |||
271 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
272 | { | ||
273 | unsigned i; | ||
274 | |||
275 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
276 | mid[i] = virt_to_mfn(p2m_missing); | ||
277 | } | ||
278 | |||
279 | static void p2m_init(unsigned long *p2m) | ||
280 | { | ||
281 | unsigned i; | ||
282 | |||
283 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
284 | p2m[i] = INVALID_P2M_ENTRY; | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
289 | * | ||
290 | * This is called both at boot time, and after resuming from suspend: | ||
291 | * - At boot time we're called very early, and must use extend_brk() | ||
292 | * to allocate memory. | ||
293 | * | ||
294 | * - After resume we're called from within stop_machine, but the mfn | ||
295 | * tree should alreay be completely allocated. | ||
296 | */ | ||
297 | void xen_build_mfn_list_list(void) | ||
298 | { | ||
299 | unsigned long pfn; | ||
300 | |||
301 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
302 | if (p2m_top_mfn == NULL) { | ||
303 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
304 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
305 | |||
306 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
307 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
308 | |||
309 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
310 | p2m_top_mfn_init(p2m_top_mfn); | ||
311 | } else { | ||
312 | /* Reinitialise, mfn's all change after migration */ | ||
313 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
314 | } | ||
315 | |||
316 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
317 | unsigned topidx = p2m_top_index(pfn); | ||
318 | unsigned mididx = p2m_mid_index(pfn); | ||
319 | unsigned long **mid; | ||
320 | unsigned long *mid_mfn_p; | ||
321 | |||
322 | mid = p2m_top[topidx]; | ||
323 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
324 | |||
325 | /* Don't bother allocating any mfn mid levels if | ||
326 | * they're just missing, just update the stored mfn, | ||
327 | * since all could have changed over a migrate. | ||
328 | */ | ||
329 | if (mid == p2m_mid_missing) { | ||
330 | BUG_ON(mididx); | ||
331 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
332 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
333 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
334 | continue; | ||
335 | } | ||
336 | |||
337 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
338 | /* | ||
339 | * XXX boot-time only! We should never find | ||
340 | * missing parts of the mfn tree after | ||
341 | * runtime. extend_brk() will BUG if we call | ||
342 | * it too late. | ||
343 | */ | ||
344 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
345 | p2m_mid_mfn_init(mid_mfn_p); | ||
346 | |||
347 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
348 | } | ||
349 | |||
350 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
351 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | void xen_setup_mfn_list_list(void) | ||
356 | { | ||
357 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
358 | |||
359 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
360 | virt_to_mfn(p2m_top_mfn); | ||
361 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
362 | } | ||
363 | |||
364 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
365 | void __init xen_build_dynamic_phys_to_machine(void) | ||
366 | { | ||
367 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
368 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
369 | unsigned long pfn; | ||
370 | |||
371 | xen_max_p2m_pfn = max_pfn; | ||
372 | |||
373 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
374 | p2m_init(p2m_missing); | ||
375 | |||
376 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
377 | p2m_mid_init(p2m_mid_missing); | ||
378 | |||
379 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
380 | p2m_top_init(p2m_top); | ||
381 | |||
382 | /* | ||
383 | * The domain builder gives us a pre-constructed p2m array in | ||
384 | * mfn_list for all the pages initially given to us, so we just | ||
385 | * need to graft that into our tree structure. | ||
386 | */ | ||
387 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
388 | unsigned topidx = p2m_top_index(pfn); | ||
389 | unsigned mididx = p2m_mid_index(pfn); | ||
390 | |||
391 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
392 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
393 | p2m_mid_init(mid); | ||
394 | |||
395 | p2m_top[topidx] = mid; | ||
396 | } | ||
397 | |||
398 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
399 | } | ||
400 | } | ||
401 | |||
402 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
403 | { | ||
404 | unsigned topidx, mididx, idx; | ||
405 | |||
406 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
407 | return INVALID_P2M_ENTRY; | ||
408 | |||
409 | topidx = p2m_top_index(pfn); | ||
410 | mididx = p2m_mid_index(pfn); | ||
411 | idx = p2m_index(pfn); | ||
412 | |||
413 | return p2m_top[topidx][mididx][idx]; | ||
414 | } | ||
415 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
416 | |||
417 | static void *alloc_p2m_page(void) | ||
418 | { | ||
419 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
420 | } | ||
421 | |||
422 | static void free_p2m_page(void *p) | ||
423 | { | ||
424 | free_page((unsigned long)p); | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
429 | * that both the top and mid levels are allocated, and make sure the | ||
430 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
431 | * the new pages are installed with cmpxchg; if we lose the race then | ||
432 | * simply free the page we allocated and use the one that's there. | ||
433 | */ | ||
434 | static bool alloc_p2m(unsigned long pfn) | ||
435 | { | ||
436 | unsigned topidx, mididx; | ||
437 | unsigned long ***top_p, **mid; | ||
438 | unsigned long *top_mfn_p, *mid_mfn; | ||
439 | |||
440 | topidx = p2m_top_index(pfn); | ||
441 | mididx = p2m_mid_index(pfn); | ||
442 | |||
443 | top_p = &p2m_top[topidx]; | ||
444 | mid = *top_p; | ||
445 | |||
446 | if (mid == p2m_mid_missing) { | ||
447 | /* Mid level is missing, allocate a new one */ | ||
448 | mid = alloc_p2m_page(); | ||
449 | if (!mid) | ||
450 | return false; | ||
451 | |||
452 | p2m_mid_init(mid); | ||
453 | |||
454 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
455 | free_p2m_page(mid); | ||
456 | } | ||
457 | |||
458 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
459 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
460 | |||
461 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
462 | |||
463 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
464 | /* Separately check the mid mfn level */ | ||
465 | unsigned long missing_mfn; | ||
466 | unsigned long mid_mfn_mfn; | ||
467 | |||
468 | mid_mfn = alloc_p2m_page(); | ||
469 | if (!mid_mfn) | ||
470 | return false; | ||
471 | |||
472 | p2m_mid_mfn_init(mid_mfn); | ||
473 | |||
474 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
475 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
476 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
477 | free_p2m_page(mid_mfn); | ||
478 | else | ||
479 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
480 | } | ||
481 | |||
482 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
483 | /* p2m leaf page is missing */ | ||
484 | unsigned long *p2m; | ||
485 | |||
486 | p2m = alloc_p2m_page(); | ||
487 | if (!p2m) | ||
488 | return false; | ||
489 | |||
490 | p2m_init(p2m); | ||
491 | |||
492 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
493 | free_p2m_page(p2m); | ||
494 | else | ||
495 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
496 | } | ||
497 | |||
498 | return true; | ||
499 | } | ||
500 | |||
501 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
502 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
503 | { | ||
504 | unsigned topidx, mididx, idx; | ||
505 | |||
506 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
507 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
508 | return true; | ||
509 | } | ||
510 | |||
511 | topidx = p2m_top_index(pfn); | ||
512 | mididx = p2m_mid_index(pfn); | ||
513 | idx = p2m_index(pfn); | ||
514 | |||
515 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
516 | return mfn == INVALID_P2M_ENTRY; | ||
517 | |||
518 | p2m_top[topidx][mididx][idx] = mfn; | ||
519 | |||
520 | return true; | ||
521 | } | ||
522 | |||
523 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
524 | { | ||
525 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
526 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
527 | return true; | ||
528 | } | ||
529 | |||
530 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
531 | if (!alloc_p2m(pfn)) | ||
532 | return false; | ||
533 | |||
534 | if (!__set_phys_to_machine(pfn, mfn)) | ||
535 | return false; | ||
536 | } | ||
537 | |||
538 | return true; | ||
539 | } | ||
540 | |||
541 | unsigned long arbitrary_virt_to_mfn(void *vaddr) | 176 | unsigned long arbitrary_virt_to_mfn(void *vaddr) |
542 | { | 177 | { |
543 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); | 178 | xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); |
@@ -566,6 +201,7 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) | |||
566 | offset = address & ~PAGE_MASK; | 201 | offset = address & ~PAGE_MASK; |
567 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); | 202 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); |
568 | } | 203 | } |
204 | EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); | ||
569 | 205 | ||
570 | void make_lowmem_page_readonly(void *vaddr) | 206 | void make_lowmem_page_readonly(void *vaddr) |
571 | { | 207 | { |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c new file mode 100644 index 00000000000..fd12d7ce7ff --- /dev/null +++ b/arch/x86/xen/p2m.c | |||
@@ -0,0 +1,522 @@ | |||
1 | /* | ||
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | ||
3 | * guests themselves, but it must also access and update the p2m array | ||
4 | * during suspend/resume when all the pages are reallocated. | ||
5 | * | ||
6 | * The p2m table is logically a flat array, but we implement it as a | ||
7 | * three-level tree to allow the address space to be sparse. | ||
8 | * | ||
9 | * Xen | ||
10 | * | | ||
11 | * p2m_top p2m_top_mfn | ||
12 | * / \ / \ | ||
13 | * p2m_mid p2m_mid p2m_mid_mfn p2m_mid_mfn | ||
14 | * / \ / \ / / | ||
15 | * p2m p2m p2m p2m p2m p2m p2m ... | ||
16 | * | ||
17 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | ||
18 | * | ||
19 | * The p2m_top and p2m_top_mfn levels are limited to 1 page, so the | ||
20 | * maximum representable pseudo-physical address space is: | ||
21 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages | ||
22 | * | ||
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | ||
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | ||
25 | * 512 and 1024 entries respectively. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <linux/hash.h> | ||
32 | #include <linux/sched.h> | ||
33 | |||
34 | #include <asm/cache.h> | ||
35 | #include <asm/setup.h> | ||
36 | |||
37 | #include <asm/xen/page.h> | ||
38 | #include <asm/xen/hypercall.h> | ||
39 | #include <asm/xen/hypervisor.h> | ||
40 | |||
41 | #include "xen-ops.h" | ||
42 | |||
43 | static void __init m2p_override_init(void); | ||
44 | |||
45 | unsigned long xen_max_p2m_pfn __read_mostly; | ||
46 | |||
47 | #define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
48 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) | ||
49 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | ||
50 | |||
51 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
52 | |||
53 | /* Placeholders for holes in the address space */ | ||
54 | static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE); | ||
55 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE); | ||
56 | static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_missing_mfn, P2M_MID_PER_PAGE); | ||
57 | |||
58 | static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | ||
59 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | ||
60 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | ||
61 | |||
62 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
63 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | ||
64 | |||
65 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
66 | { | ||
67 | BUG_ON(pfn >= MAX_P2M_PFN); | ||
68 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | ||
69 | } | ||
70 | |||
71 | static inline unsigned p2m_mid_index(unsigned long pfn) | ||
72 | { | ||
73 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | ||
74 | } | ||
75 | |||
76 | static inline unsigned p2m_index(unsigned long pfn) | ||
77 | { | ||
78 | return pfn % P2M_PER_PAGE; | ||
79 | } | ||
80 | |||
81 | static void p2m_top_init(unsigned long ***top) | ||
82 | { | ||
83 | unsigned i; | ||
84 | |||
85 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
86 | top[i] = p2m_mid_missing; | ||
87 | } | ||
88 | |||
89 | static void p2m_top_mfn_init(unsigned long *top) | ||
90 | { | ||
91 | unsigned i; | ||
92 | |||
93 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
94 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | ||
95 | } | ||
96 | |||
97 | static void p2m_top_mfn_p_init(unsigned long **top) | ||
98 | { | ||
99 | unsigned i; | ||
100 | |||
101 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | ||
102 | top[i] = p2m_mid_missing_mfn; | ||
103 | } | ||
104 | |||
105 | static void p2m_mid_init(unsigned long **mid) | ||
106 | { | ||
107 | unsigned i; | ||
108 | |||
109 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
110 | mid[i] = p2m_missing; | ||
111 | } | ||
112 | |||
113 | static void p2m_mid_mfn_init(unsigned long *mid) | ||
114 | { | ||
115 | unsigned i; | ||
116 | |||
117 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
118 | mid[i] = virt_to_mfn(p2m_missing); | ||
119 | } | ||
120 | |||
121 | static void p2m_init(unsigned long *p2m) | ||
122 | { | ||
123 | unsigned i; | ||
124 | |||
125 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | ||
126 | p2m[i] = INVALID_P2M_ENTRY; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | ||
131 | * | ||
132 | * This is called both at boot time, and after resuming from suspend: | ||
133 | * - At boot time we're called very early, and must use extend_brk() | ||
134 | * to allocate memory. | ||
135 | * | ||
136 | * - After resume we're called from within stop_machine, but the mfn | ||
137 | * tree should alreay be completely allocated. | ||
138 | */ | ||
139 | void xen_build_mfn_list_list(void) | ||
140 | { | ||
141 | unsigned long pfn; | ||
142 | |||
143 | /* Pre-initialize p2m_top_mfn to be completely missing */ | ||
144 | if (p2m_top_mfn == NULL) { | ||
145 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
146 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
147 | |||
148 | p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
149 | p2m_top_mfn_p_init(p2m_top_mfn_p); | ||
150 | |||
151 | p2m_top_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
152 | p2m_top_mfn_init(p2m_top_mfn); | ||
153 | } else { | ||
154 | /* Reinitialise, mfn's all change after migration */ | ||
155 | p2m_mid_mfn_init(p2m_mid_missing_mfn); | ||
156 | } | ||
157 | |||
158 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { | ||
159 | unsigned topidx = p2m_top_index(pfn); | ||
160 | unsigned mididx = p2m_mid_index(pfn); | ||
161 | unsigned long **mid; | ||
162 | unsigned long *mid_mfn_p; | ||
163 | |||
164 | mid = p2m_top[topidx]; | ||
165 | mid_mfn_p = p2m_top_mfn_p[topidx]; | ||
166 | |||
167 | /* Don't bother allocating any mfn mid levels if | ||
168 | * they're just missing, just update the stored mfn, | ||
169 | * since all could have changed over a migrate. | ||
170 | */ | ||
171 | if (mid == p2m_mid_missing) { | ||
172 | BUG_ON(mididx); | ||
173 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | ||
174 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | ||
175 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | ||
176 | continue; | ||
177 | } | ||
178 | |||
179 | if (mid_mfn_p == p2m_mid_missing_mfn) { | ||
180 | /* | ||
181 | * XXX boot-time only! We should never find | ||
182 | * missing parts of the mfn tree after | ||
183 | * runtime. extend_brk() will BUG if we call | ||
184 | * it too late. | ||
185 | */ | ||
186 | mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
187 | p2m_mid_mfn_init(mid_mfn_p); | ||
188 | |||
189 | p2m_top_mfn_p[topidx] = mid_mfn_p; | ||
190 | } | ||
191 | |||
192 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | ||
193 | mid_mfn_p[mididx] = virt_to_mfn(mid[mididx]); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | void xen_setup_mfn_list_list(void) | ||
198 | { | ||
199 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
200 | |||
201 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
202 | virt_to_mfn(p2m_top_mfn); | ||
203 | HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; | ||
204 | } | ||
205 | |||
206 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
207 | void __init xen_build_dynamic_phys_to_machine(void) | ||
208 | { | ||
209 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
210 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
211 | unsigned long pfn; | ||
212 | |||
213 | xen_max_p2m_pfn = max_pfn; | ||
214 | |||
215 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
216 | p2m_init(p2m_missing); | ||
217 | |||
218 | p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
219 | p2m_mid_init(p2m_mid_missing); | ||
220 | |||
221 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
222 | p2m_top_init(p2m_top); | ||
223 | |||
224 | /* | ||
225 | * The domain builder gives us a pre-constructed p2m array in | ||
226 | * mfn_list for all the pages initially given to us, so we just | ||
227 | * need to graft that into our tree structure. | ||
228 | */ | ||
229 | for (pfn = 0; pfn < max_pfn; pfn += P2M_PER_PAGE) { | ||
230 | unsigned topidx = p2m_top_index(pfn); | ||
231 | unsigned mididx = p2m_mid_index(pfn); | ||
232 | |||
233 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
234 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
235 | p2m_mid_init(mid); | ||
236 | |||
237 | p2m_top[topidx] = mid; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * As long as the mfn_list has enough entries to completely | ||
242 | * fill a p2m page, pointing into the array is ok. But if | ||
243 | * not the entries beyond the last pfn will be undefined. | ||
244 | */ | ||
245 | if (unlikely(pfn + P2M_PER_PAGE > max_pfn)) { | ||
246 | unsigned long p2midx; | ||
247 | |||
248 | p2midx = max_pfn % P2M_PER_PAGE; | ||
249 | for ( ; p2midx < P2M_PER_PAGE; p2midx++) | ||
250 | mfn_list[pfn + p2midx] = INVALID_P2M_ENTRY; | ||
251 | } | ||
252 | p2m_top[topidx][mididx] = &mfn_list[pfn]; | ||
253 | } | ||
254 | |||
255 | m2p_override_init(); | ||
256 | } | ||
257 | |||
258 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
259 | { | ||
260 | unsigned topidx, mididx, idx; | ||
261 | |||
262 | if (unlikely(pfn >= MAX_P2M_PFN)) | ||
263 | return INVALID_P2M_ENTRY; | ||
264 | |||
265 | topidx = p2m_top_index(pfn); | ||
266 | mididx = p2m_mid_index(pfn); | ||
267 | idx = p2m_index(pfn); | ||
268 | |||
269 | return p2m_top[topidx][mididx][idx]; | ||
270 | } | ||
271 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
272 | |||
273 | static void *alloc_p2m_page(void) | ||
274 | { | ||
275 | return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); | ||
276 | } | ||
277 | |||
278 | static void free_p2m_page(void *p) | ||
279 | { | ||
280 | free_page((unsigned long)p); | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Fully allocate the p2m structure for a given pfn. We need to check | ||
285 | * that both the top and mid levels are allocated, and make sure the | ||
286 | * parallel mfn tree is kept in sync. We may race with other cpus, so | ||
287 | * the new pages are installed with cmpxchg; if we lose the race then | ||
288 | * simply free the page we allocated and use the one that's there. | ||
289 | */ | ||
290 | static bool alloc_p2m(unsigned long pfn) | ||
291 | { | ||
292 | unsigned topidx, mididx; | ||
293 | unsigned long ***top_p, **mid; | ||
294 | unsigned long *top_mfn_p, *mid_mfn; | ||
295 | |||
296 | topidx = p2m_top_index(pfn); | ||
297 | mididx = p2m_mid_index(pfn); | ||
298 | |||
299 | top_p = &p2m_top[topidx]; | ||
300 | mid = *top_p; | ||
301 | |||
302 | if (mid == p2m_mid_missing) { | ||
303 | /* Mid level is missing, allocate a new one */ | ||
304 | mid = alloc_p2m_page(); | ||
305 | if (!mid) | ||
306 | return false; | ||
307 | |||
308 | p2m_mid_init(mid); | ||
309 | |||
310 | if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) | ||
311 | free_p2m_page(mid); | ||
312 | } | ||
313 | |||
314 | top_mfn_p = &p2m_top_mfn[topidx]; | ||
315 | mid_mfn = p2m_top_mfn_p[topidx]; | ||
316 | |||
317 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); | ||
318 | |||
319 | if (mid_mfn == p2m_mid_missing_mfn) { | ||
320 | /* Separately check the mid mfn level */ | ||
321 | unsigned long missing_mfn; | ||
322 | unsigned long mid_mfn_mfn; | ||
323 | |||
324 | mid_mfn = alloc_p2m_page(); | ||
325 | if (!mid_mfn) | ||
326 | return false; | ||
327 | |||
328 | p2m_mid_mfn_init(mid_mfn); | ||
329 | |||
330 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); | ||
331 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | ||
332 | if (cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn) != missing_mfn) | ||
333 | free_p2m_page(mid_mfn); | ||
334 | else | ||
335 | p2m_top_mfn_p[topidx] = mid_mfn; | ||
336 | } | ||
337 | |||
338 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
339 | /* p2m leaf page is missing */ | ||
340 | unsigned long *p2m; | ||
341 | |||
342 | p2m = alloc_p2m_page(); | ||
343 | if (!p2m) | ||
344 | return false; | ||
345 | |||
346 | p2m_init(p2m); | ||
347 | |||
348 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | ||
349 | free_p2m_page(p2m); | ||
350 | else | ||
351 | mid_mfn[mididx] = virt_to_mfn(p2m); | ||
352 | } | ||
353 | |||
354 | return true; | ||
355 | } | ||
356 | |||
357 | /* Try to install p2m mapping; fail if intermediate bits missing */ | ||
358 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
359 | { | ||
360 | unsigned topidx, mididx, idx; | ||
361 | |||
362 | if (unlikely(pfn >= MAX_P2M_PFN)) { | ||
363 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
364 | return true; | ||
365 | } | ||
366 | |||
367 | topidx = p2m_top_index(pfn); | ||
368 | mididx = p2m_mid_index(pfn); | ||
369 | idx = p2m_index(pfn); | ||
370 | |||
371 | if (p2m_top[topidx][mididx] == p2m_missing) | ||
372 | return mfn == INVALID_P2M_ENTRY; | ||
373 | |||
374 | p2m_top[topidx][mididx][idx] = mfn; | ||
375 | |||
376 | return true; | ||
377 | } | ||
378 | |||
379 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
380 | { | ||
381 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
382 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
383 | return true; | ||
384 | } | ||
385 | |||
386 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | ||
387 | if (!alloc_p2m(pfn)) | ||
388 | return false; | ||
389 | |||
390 | if (!__set_phys_to_machine(pfn, mfn)) | ||
391 | return false; | ||
392 | } | ||
393 | |||
394 | return true; | ||
395 | } | ||
396 | |||
397 | #define M2P_OVERRIDE_HASH_SHIFT 10 | ||
398 | #define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) | ||
399 | |||
400 | static RESERVE_BRK_ARRAY(struct list_head, m2p_overrides, M2P_OVERRIDE_HASH); | ||
401 | static DEFINE_SPINLOCK(m2p_override_lock); | ||
402 | |||
403 | static void __init m2p_override_init(void) | ||
404 | { | ||
405 | unsigned i; | ||
406 | |||
407 | m2p_overrides = extend_brk(sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, | ||
408 | sizeof(unsigned long)); | ||
409 | |||
410 | for (i = 0; i < M2P_OVERRIDE_HASH; i++) | ||
411 | INIT_LIST_HEAD(&m2p_overrides[i]); | ||
412 | } | ||
413 | |||
414 | static unsigned long mfn_hash(unsigned long mfn) | ||
415 | { | ||
416 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | ||
417 | } | ||
418 | |||
419 | /* Add an MFN override for a particular page */ | ||
420 | int m2p_add_override(unsigned long mfn, struct page *page) | ||
421 | { | ||
422 | unsigned long flags; | ||
423 | unsigned long pfn; | ||
424 | unsigned long address; | ||
425 | unsigned level; | ||
426 | pte_t *ptep = NULL; | ||
427 | |||
428 | pfn = page_to_pfn(page); | ||
429 | if (!PageHighMem(page)) { | ||
430 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
431 | ptep = lookup_address(address, &level); | ||
432 | |||
433 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
434 | "m2p_add_override: pfn %lx not mapped", pfn)) | ||
435 | return -EINVAL; | ||
436 | } | ||
437 | |||
438 | page->private = mfn; | ||
439 | page->index = pfn_to_mfn(pfn); | ||
440 | |||
441 | __set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); | ||
442 | if (!PageHighMem(page)) | ||
443 | /* Just zap old mapping for now */ | ||
444 | pte_clear(&init_mm, address, ptep); | ||
445 | |||
446 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
447 | list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); | ||
448 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
449 | |||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | int m2p_remove_override(struct page *page) | ||
454 | { | ||
455 | unsigned long flags; | ||
456 | unsigned long mfn; | ||
457 | unsigned long pfn; | ||
458 | unsigned long address; | ||
459 | unsigned level; | ||
460 | pte_t *ptep = NULL; | ||
461 | |||
462 | pfn = page_to_pfn(page); | ||
463 | mfn = get_phys_to_machine(pfn); | ||
464 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
465 | return -EINVAL; | ||
466 | |||
467 | if (!PageHighMem(page)) { | ||
468 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | ||
469 | ptep = lookup_address(address, &level); | ||
470 | |||
471 | if (WARN(ptep == NULL || level != PG_LEVEL_4K, | ||
472 | "m2p_remove_override: pfn %lx not mapped", pfn)) | ||
473 | return -EINVAL; | ||
474 | } | ||
475 | |||
476 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
477 | list_del(&page->lru); | ||
478 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
479 | __set_phys_to_machine(pfn, page->index); | ||
480 | |||
481 | if (!PageHighMem(page)) | ||
482 | set_pte_at(&init_mm, address, ptep, | ||
483 | pfn_pte(pfn, PAGE_KERNEL)); | ||
484 | /* No tlb flush necessary because the caller already | ||
485 | * left the pte unmapped. */ | ||
486 | |||
487 | return 0; | ||
488 | } | ||
489 | |||
490 | struct page *m2p_find_override(unsigned long mfn) | ||
491 | { | ||
492 | unsigned long flags; | ||
493 | struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; | ||
494 | struct page *p, *ret; | ||
495 | |||
496 | ret = NULL; | ||
497 | |||
498 | spin_lock_irqsave(&m2p_override_lock, flags); | ||
499 | |||
500 | list_for_each_entry(p, bucket, lru) { | ||
501 | if (p->private == mfn) { | ||
502 | ret = p; | ||
503 | break; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | spin_unlock_irqrestore(&m2p_override_lock, flags); | ||
508 | |||
509 | return ret; | ||
510 | } | ||
511 | |||
512 | unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | ||
513 | { | ||
514 | struct page *p = m2p_find_override(mfn); | ||
515 | unsigned long ret = pfn; | ||
516 | |||
517 | if (p) | ||
518 | ret = page_to_pfn(p); | ||
519 | |||
520 | return ret; | ||
521 | } | ||
522 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b5a7f928234..a8a66a50d44 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -179,8 +179,13 @@ char * __init xen_memory_setup(void) | |||
179 | e820.nr_map = 0; | 179 | e820.nr_map = 0; |
180 | xen_extra_mem_start = mem_end; | 180 | xen_extra_mem_start = mem_end; |
181 | for (i = 0; i < memmap.nr_entries; i++) { | 181 | for (i = 0; i < memmap.nr_entries; i++) { |
182 | unsigned long long end = map[i].addr + map[i].size; | 182 | unsigned long long end; |
183 | 183 | ||
184 | /* Guard against non-page aligned E820 entries. */ | ||
185 | if (map[i].type == E820_RAM) | ||
186 | map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE; | ||
187 | |||
188 | end = map[i].addr + map[i].size; | ||
184 | if (map[i].type == E820_RAM && end > mem_end) { | 189 | if (map[i].type == E820_RAM && end > mem_end) { |
185 | /* RAM off the end - may be partially included */ | 190 | /* RAM off the end - may be partially included */ |
186 | u64 delta = min(map[i].size, end - mem_end); | 191 | u64 delta = min(map[i].size, end - mem_end); |
@@ -350,6 +355,7 @@ void __init xen_arch_setup(void) | |||
350 | boot_cpu_data.hlt_works_ok = 1; | 355 | boot_cpu_data.hlt_works_ok = 1; |
351 | #endif | 356 | #endif |
352 | pm_idle = default_idle; | 357 | pm_idle = default_idle; |
358 | boot_option_idle_override = IDLE_HALT; | ||
353 | 359 | ||
354 | fiddle_vdso(); | 360 | fiddle_vdso(); |
355 | } | 361 | } |