diff options
Diffstat (limited to 'arch/x86')
53 files changed, 1155 insertions, 684 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7ab9db88ab6a..dfabfefc21c4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -28,6 +28,7 @@ config X86 | |||
28 | select HAVE_IRQ_WORK | 28 | select HAVE_IRQ_WORK |
29 | select HAVE_IOREMAP_PROT | 29 | select HAVE_IOREMAP_PROT |
30 | select HAVE_KPROBES | 30 | select HAVE_KPROBES |
31 | select HAVE_MEMBLOCK | ||
31 | select ARCH_WANT_OPTIONAL_GPIOLIB | 32 | select ARCH_WANT_OPTIONAL_GPIOLIB |
32 | select ARCH_WANT_FRAME_POINTERS | 33 | select ARCH_WANT_FRAME_POINTERS |
33 | select HAVE_DMA_ATTRS | 34 | select HAVE_DMA_ATTRS |
@@ -201,9 +202,6 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING | |||
201 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 202 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
202 | def_bool y | 203 | def_bool y |
203 | 204 | ||
204 | config HAVE_EARLY_RES | ||
205 | def_bool y | ||
206 | |||
207 | config HAVE_INTEL_TXT | 205 | config HAVE_INTEL_TXT |
208 | def_bool y | 206 | def_bool y |
209 | depends on EXPERIMENTAL && DMAR && ACPI | 207 | depends on EXPERIMENTAL && DMAR && ACPI |
@@ -548,16 +546,7 @@ config PARAVIRT_DEBUG | |||
548 | a paravirt_op is missing when it is called. | 546 | a paravirt_op is missing when it is called. |
549 | 547 | ||
550 | config NO_BOOTMEM | 548 | config NO_BOOTMEM |
551 | default y | 549 | def_bool y |
552 | bool "Disable Bootmem code" | ||
553 | ---help--- | ||
554 | Use early_res directly instead of bootmem before slab is ready. | ||
555 | - allocator (buddy) [generic] | ||
556 | - early allocator (bootmem) [generic] | ||
557 | - very early allocator (reserve_early*()) [x86] | ||
558 | - very very early allocator (early brk model) [x86] | ||
559 | So reduce one layer between early allocator to final allocator | ||
560 | |||
561 | 550 | ||
562 | config MEMTEST | 551 | config MEMTEST |
563 | bool "Memtest" | 552 | bool "Memtest" |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index f16a2caca1e0..a6863a2dec1f 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -24,11 +24,11 @@ | |||
24 | 24 | ||
25 | #ifdef CONFIG_AMD_IOMMU | 25 | #ifdef CONFIG_AMD_IOMMU |
26 | 26 | ||
27 | extern void amd_iommu_detect(void); | 27 | extern int amd_iommu_detect(void); |
28 | 28 | ||
29 | #else | 29 | #else |
30 | 30 | ||
31 | static inline void amd_iommu_detect(void) { } | 31 | static inline int amd_iommu_detect(void) { return -ENODEV; } |
32 | 32 | ||
33 | #endif | 33 | #endif |
34 | 34 | ||
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index 0918654305af..0d467b338835 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h | |||
@@ -62,9 +62,9 @@ struct cal_chipset_ops { | |||
62 | extern int use_calgary; | 62 | extern int use_calgary; |
63 | 63 | ||
64 | #ifdef CONFIG_CALGARY_IOMMU | 64 | #ifdef CONFIG_CALGARY_IOMMU |
65 | extern void detect_calgary(void); | 65 | extern int detect_calgary(void); |
66 | #else | 66 | #else |
67 | static inline void detect_calgary(void) { return; } | 67 | static inline int detect_calgary(void) { return -ENODEV; } |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | #endif /* _ASM_X86_CALGARY_H */ | 70 | #endif /* _ASM_X86_CALGARY_H */ |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index ec8a52d14ab1..5be1542fbfaf 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -112,23 +112,13 @@ static inline void early_memtest(unsigned long start, unsigned long end) | |||
112 | } | 112 | } |
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | extern unsigned long end_user_pfn; | ||
116 | |||
117 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); | ||
118 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); | ||
119 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | ||
120 | #include <linux/early_res.h> | ||
121 | |||
122 | extern unsigned long e820_end_of_ram_pfn(void); | 115 | extern unsigned long e820_end_of_ram_pfn(void); |
123 | extern unsigned long e820_end_of_low_ram_pfn(void); | 116 | extern unsigned long e820_end_of_low_ram_pfn(void); |
124 | extern int e820_find_active_region(const struct e820entry *ei, | 117 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
125 | unsigned long start_pfn, | 118 | |
126 | unsigned long last_pfn, | 119 | void memblock_x86_fill(void); |
127 | unsigned long *ei_startpfn, | 120 | void memblock_find_dma_reserve(void); |
128 | unsigned long *ei_endpfn); | 121 | |
129 | extern void e820_register_active_regions(int nid, unsigned long start_pfn, | ||
130 | unsigned long end_pfn); | ||
131 | extern u64 e820_hole_size(u64 start, u64 end); | ||
132 | extern void finish_e820_parsing(void); | 122 | extern void finish_e820_parsing(void); |
133 | extern void e820_reserve_resources(void); | 123 | extern void e820_reserve_resources(void); |
134 | extern void e820_reserve_resources_late(void); | 124 | extern void e820_reserve_resources_late(void); |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8406ed7f9926..8e4a16508d4e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -90,7 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
90 | #endif /* CONFIG_X86_32 */ | 90 | #endif /* CONFIG_X86_32 */ |
91 | 91 | ||
92 | extern int add_efi_memmap; | 92 | extern int add_efi_memmap; |
93 | extern void efi_reserve_early(void); | 93 | extern void efi_memblock_x86_reserve_range(void); |
94 | extern void efi_call_phys_prelog(void); | 94 | extern void efi_call_phys_prelog(void); |
95 | extern void efi_call_phys_epilog(void); | 95 | extern void efi_call_phys_epilog(void); |
96 | 96 | ||
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index bf357f9b25f0..43085bfc99c3 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -37,7 +37,7 @@ extern int gart_iommu_aperture_disabled; | |||
37 | extern void early_gart_iommu_check(void); | 37 | extern void early_gart_iommu_check(void); |
38 | extern int gart_iommu_init(void); | 38 | extern int gart_iommu_init(void); |
39 | extern void __init gart_parse_options(char *); | 39 | extern void __init gart_parse_options(char *); |
40 | extern void gart_iommu_hole_init(void); | 40 | extern int gart_iommu_hole_init(void); |
41 | 41 | ||
42 | #else | 42 | #else |
43 | #define gart_iommu_aperture 0 | 43 | #define gart_iommu_aperture 0 |
@@ -50,8 +50,9 @@ static inline void early_gart_iommu_check(void) | |||
50 | static inline void gart_parse_options(char *options) | 50 | static inline void gart_parse_options(char *options) |
51 | { | 51 | { |
52 | } | 52 | } |
53 | static inline void gart_iommu_hole_init(void) | 53 | static inline int gart_iommu_hole_init(void) |
54 | { | 54 | { |
55 | return -ENODEV; | ||
55 | } | 56 | } |
56 | #endif | 57 | #endif |
57 | 58 | ||
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 6a45ec41ec26..f0203f4791a8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -349,6 +349,7 @@ extern void __iomem *early_memremap(resource_size_t phys_addr, | |||
349 | unsigned long size); | 349 | unsigned long size); |
350 | extern void early_iounmap(void __iomem *addr, unsigned long size); | 350 | extern void early_iounmap(void __iomem *addr, unsigned long size); |
351 | extern void fixup_early_ioremap(void); | 351 | extern void fixup_early_ioremap(void); |
352 | extern bool is_early_ioremap_ptep(pte_t *ptep); | ||
352 | 353 | ||
353 | #define IO_SPACE_LIMIT 0xffff | 354 | #define IO_SPACE_LIMIT 0xffff |
354 | 355 | ||
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h new file mode 100644 index 000000000000..f229b13a5f30 --- /dev/null +++ b/arch/x86/include/asm/iommu_table.h | |||
@@ -0,0 +1,100 @@ | |||
1 | #ifndef _ASM_X86_IOMMU_TABLE_H | ||
2 | #define _ASM_X86_IOMMU_TABLE_H | ||
3 | |||
4 | #include <asm/swiotlb.h> | ||
5 | |||
6 | /* | ||
7 | * History lesson: | ||
8 | * The execution chain of IOMMUs in 2.6.36 looks as so: | ||
9 | * | ||
10 | * [xen-swiotlb] | ||
11 | * | | ||
12 | * +----[swiotlb *]--+ | ||
13 | * / | \ | ||
14 | * / | \ | ||
15 | * [GART] [Calgary] [Intel VT-d] | ||
16 | * / | ||
17 | * / | ||
18 | * [AMD-Vi] | ||
19 | * | ||
20 | * *: if SWIOTLB detected 'iommu=soft'/'swiotlb=force' it would skip | ||
21 | * over the rest of IOMMUs and unconditionally initialize the SWIOTLB. | ||
22 | * Also it would surreptitiously initialize set the swiotlb=1 if there were | ||
23 | * more than 4GB and if the user did not pass in 'iommu=off'. The swiotlb | ||
24 | * flag would be turned off by all IOMMUs except the Calgary one. | ||
25 | * | ||
26 | * The IOMMU_INIT* macros allow a similar tree (or more complex if desired) | ||
27 | * to be built by defining who we depend on. | ||
28 | * | ||
29 | * And all that needs to be done is to use one of the macros in the IOMMU | ||
30 | * and the pci-dma.c will take care of the rest. | ||
31 | */ | ||
32 | |||
33 | struct iommu_table_entry { | ||
34 | initcall_t detect; | ||
35 | initcall_t depend; | ||
36 | void (*early_init)(void); /* No memory allocate available. */ | ||
37 | void (*late_init)(void); /* Yes, can allocate memory. */ | ||
38 | #define IOMMU_FINISH_IF_DETECTED (1<<0) | ||
39 | #define IOMMU_DETECTED (1<<1) | ||
40 | int flags; | ||
41 | }; | ||
42 | /* | ||
43 | * Macro fills out an entry in the .iommu_table that is equivalent | ||
44 | * to the fields that 'struct iommu_table_entry' has. The entries | ||
45 | * that are put in the .iommu_table section are not put in any order | ||
46 | * hence during boot-time we will have to resort them based on | ||
47 | * dependency. */ | ||
48 | |||
49 | |||
50 | #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ | ||
51 | static const struct iommu_table_entry const \ | ||
52 | __iommu_entry_##_detect __used \ | ||
53 | __attribute__ ((unused, __section__(".iommu_table"), \ | ||
54 | aligned((sizeof(void *))))) \ | ||
55 | = {_detect, _depend, _early_init, _late_init, \ | ||
56 | _finish ? IOMMU_FINISH_IF_DETECTED : 0} | ||
57 | /* | ||
58 | * The simplest IOMMU definition. Provide the detection routine | ||
59 | * and it will be run after the SWIOTLB and the other IOMMUs | ||
60 | * that utilize this macro. If the IOMMU is detected (ie, the | ||
61 | * detect routine returns a positive value), the other IOMMUs | ||
62 | * are also checked. You can use IOMMU_INIT_POST_FINISH if you prefer | ||
63 | * to stop detecting the other IOMMUs after yours has been detected. | ||
64 | */ | ||
65 | #define IOMMU_INIT_POST(_detect) \ | ||
66 | __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) | ||
67 | |||
68 | #define IOMMU_INIT_POST_FINISH(detect) \ | ||
69 | __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) | ||
70 | |||
71 | /* | ||
72 | * A more sophisticated version of IOMMU_INIT. This variant requires: | ||
73 | * a). A detection routine function. | ||
74 | * b). The name of the detection routine we depend on to get called | ||
75 | * before us. | ||
76 | * c). The init routine which gets called if the detection routine | ||
77 | * returns a positive value from the pci_iommu_alloc. This means | ||
78 | * no presence of a memory allocator. | ||
79 | * d). Similar to the 'init', except that this gets called from pci_iommu_init | ||
80 | * where we do have a memory allocator. | ||
81 | * | ||
82 | * The standard vs the _FINISH differs in that the _FINISH variant will | ||
83 | * continue detecting other IOMMUs in the call list after the | ||
84 | * the detection routine returns a positive number. The _FINISH will | ||
85 | * stop the execution chain. Both will still call the 'init' and | ||
86 | * 'late_init' functions if they are set. | ||
87 | */ | ||
88 | #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ | ||
89 | __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) | ||
90 | |||
91 | #define IOMMU_INIT(_detect, _depend, _init, _late_init) \ | ||
92 | __IOMMU_INIT(_detect, _depend, _init, _late_init, 0) | ||
93 | |||
94 | void sort_iommu_table(struct iommu_table_entry *start, | ||
95 | struct iommu_table_entry *finish); | ||
96 | |||
97 | void check_iommu_entries(struct iommu_table_entry *start, | ||
98 | struct iommu_table_entry *finish); | ||
99 | |||
100 | #endif /* _ASM_X86_IOMMU_TABLE_H */ | ||
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9e2b952f810a..5745ce8bf108 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h | |||
@@ -61,22 +61,22 @@ static inline void native_halt(void) | |||
61 | #else | 61 | #else |
62 | #ifndef __ASSEMBLY__ | 62 | #ifndef __ASSEMBLY__ |
63 | 63 | ||
64 | static inline unsigned long __raw_local_save_flags(void) | 64 | static inline unsigned long arch_local_save_flags(void) |
65 | { | 65 | { |
66 | return native_save_fl(); | 66 | return native_save_fl(); |
67 | } | 67 | } |
68 | 68 | ||
69 | static inline void raw_local_irq_restore(unsigned long flags) | 69 | static inline void arch_local_irq_restore(unsigned long flags) |
70 | { | 70 | { |
71 | native_restore_fl(flags); | 71 | native_restore_fl(flags); |
72 | } | 72 | } |
73 | 73 | ||
74 | static inline void raw_local_irq_disable(void) | 74 | static inline void arch_local_irq_disable(void) |
75 | { | 75 | { |
76 | native_irq_disable(); | 76 | native_irq_disable(); |
77 | } | 77 | } |
78 | 78 | ||
79 | static inline void raw_local_irq_enable(void) | 79 | static inline void arch_local_irq_enable(void) |
80 | { | 80 | { |
81 | native_irq_enable(); | 81 | native_irq_enable(); |
82 | } | 82 | } |
@@ -85,7 +85,7 @@ static inline void raw_local_irq_enable(void) | |||
85 | * Used in the idle loop; sti takes one instruction cycle | 85 | * Used in the idle loop; sti takes one instruction cycle |
86 | * to complete: | 86 | * to complete: |
87 | */ | 87 | */ |
88 | static inline void raw_safe_halt(void) | 88 | static inline void arch_safe_halt(void) |
89 | { | 89 | { |
90 | native_safe_halt(); | 90 | native_safe_halt(); |
91 | } | 91 | } |
@@ -102,12 +102,10 @@ static inline void halt(void) | |||
102 | /* | 102 | /* |
103 | * For spinlocks, etc: | 103 | * For spinlocks, etc: |
104 | */ | 104 | */ |
105 | static inline unsigned long __raw_local_irq_save(void) | 105 | static inline unsigned long arch_local_irq_save(void) |
106 | { | 106 | { |
107 | unsigned long flags = __raw_local_save_flags(); | 107 | unsigned long flags = arch_local_save_flags(); |
108 | 108 | arch_local_irq_disable(); | |
109 | raw_local_irq_disable(); | ||
110 | |||
111 | return flags; | 109 | return flags; |
112 | } | 110 | } |
113 | #else | 111 | #else |
@@ -153,22 +151,16 @@ static inline unsigned long __raw_local_irq_save(void) | |||
153 | #endif /* CONFIG_PARAVIRT */ | 151 | #endif /* CONFIG_PARAVIRT */ |
154 | 152 | ||
155 | #ifndef __ASSEMBLY__ | 153 | #ifndef __ASSEMBLY__ |
156 | #define raw_local_save_flags(flags) \ | 154 | static inline int arch_irqs_disabled_flags(unsigned long flags) |
157 | do { (flags) = __raw_local_save_flags(); } while (0) | ||
158 | |||
159 | #define raw_local_irq_save(flags) \ | ||
160 | do { (flags) = __raw_local_irq_save(); } while (0) | ||
161 | |||
162 | static inline int raw_irqs_disabled_flags(unsigned long flags) | ||
163 | { | 155 | { |
164 | return !(flags & X86_EFLAGS_IF); | 156 | return !(flags & X86_EFLAGS_IF); |
165 | } | 157 | } |
166 | 158 | ||
167 | static inline int raw_irqs_disabled(void) | 159 | static inline int arch_irqs_disabled(void) |
168 | { | 160 | { |
169 | unsigned long flags = __raw_local_save_flags(); | 161 | unsigned long flags = arch_local_save_flags(); |
170 | 162 | ||
171 | return raw_irqs_disabled_flags(flags); | 163 | return arch_irqs_disabled_flags(flags); |
172 | } | 164 | } |
173 | 165 | ||
174 | #else | 166 | #else |
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h new file mode 100644 index 000000000000..19ae14ba6978 --- /dev/null +++ b/arch/x86/include/asm/memblock.h | |||
@@ -0,0 +1,23 @@ | |||
1 | #ifndef _X86_MEMBLOCK_H | ||
2 | #define _X86_MEMBLOCK_H | ||
3 | |||
4 | #define ARCH_DISCARD_MEMBLOCK | ||
5 | |||
6 | u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); | ||
7 | void memblock_x86_to_bootmem(u64 start, u64 end); | ||
8 | |||
9 | void memblock_x86_reserve_range(u64 start, u64 end, char *name); | ||
10 | void memblock_x86_free_range(u64 start, u64 end); | ||
11 | struct range; | ||
12 | int __get_free_all_memory_range(struct range **range, int nodeid, | ||
13 | unsigned long start_pfn, unsigned long end_pfn); | ||
14 | int get_free_all_memory_range(struct range **rangep, int nodeid); | ||
15 | |||
16 | void memblock_x86_register_active_regions(int nid, unsigned long start_pfn, | ||
17 | unsigned long last_pfn); | ||
18 | u64 memblock_x86_hole_size(u64 start, u64 end); | ||
19 | u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); | ||
20 | u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); | ||
21 | u64 memblock_x86_memory_in_range(u64 addr, u64 limit); | ||
22 | |||
23 | #endif | ||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index edecb4ed2210..18e3b8a8709f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -105,7 +105,7 @@ static inline void write_cr8(unsigned long x) | |||
105 | } | 105 | } |
106 | #endif | 106 | #endif |
107 | 107 | ||
108 | static inline void raw_safe_halt(void) | 108 | static inline void arch_safe_halt(void) |
109 | { | 109 | { |
110 | PVOP_VCALL0(pv_irq_ops.safe_halt); | 110 | PVOP_VCALL0(pv_irq_ops.safe_halt); |
111 | } | 111 | } |
@@ -824,32 +824,32 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock) | |||
824 | #define __PV_IS_CALLEE_SAVE(func) \ | 824 | #define __PV_IS_CALLEE_SAVE(func) \ |
825 | ((struct paravirt_callee_save) { func }) | 825 | ((struct paravirt_callee_save) { func }) |
826 | 826 | ||
827 | static inline unsigned long __raw_local_save_flags(void) | 827 | static inline unsigned long arch_local_save_flags(void) |
828 | { | 828 | { |
829 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); | 829 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); |
830 | } | 830 | } |
831 | 831 | ||
832 | static inline void raw_local_irq_restore(unsigned long f) | 832 | static inline void arch_local_irq_restore(unsigned long f) |
833 | { | 833 | { |
834 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); | 834 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); |
835 | } | 835 | } |
836 | 836 | ||
837 | static inline void raw_local_irq_disable(void) | 837 | static inline void arch_local_irq_disable(void) |
838 | { | 838 | { |
839 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); | 839 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); |
840 | } | 840 | } |
841 | 841 | ||
842 | static inline void raw_local_irq_enable(void) | 842 | static inline void arch_local_irq_enable(void) |
843 | { | 843 | { |
844 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); | 844 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); |
845 | } | 845 | } |
846 | 846 | ||
847 | static inline unsigned long __raw_local_irq_save(void) | 847 | static inline unsigned long arch_local_irq_save(void) |
848 | { | 848 | { |
849 | unsigned long f; | 849 | unsigned long f; |
850 | 850 | ||
851 | f = __raw_local_save_flags(); | 851 | f = arch_local_save_flags(); |
852 | raw_local_irq_disable(); | 852 | arch_local_irq_disable(); |
853 | return f; | 853 | return f; |
854 | } | 854 | } |
855 | 855 | ||
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 8085277e1b8b..977f1761a25d 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h | |||
@@ -5,17 +5,26 @@ | |||
5 | 5 | ||
6 | #ifdef CONFIG_SWIOTLB | 6 | #ifdef CONFIG_SWIOTLB |
7 | extern int swiotlb; | 7 | extern int swiotlb; |
8 | extern int __init pci_swiotlb_detect(void); | 8 | extern int __init pci_swiotlb_detect_override(void); |
9 | extern int __init pci_swiotlb_detect_4gb(void); | ||
9 | extern void __init pci_swiotlb_init(void); | 10 | extern void __init pci_swiotlb_init(void); |
11 | extern void __init pci_swiotlb_late_init(void); | ||
10 | #else | 12 | #else |
11 | #define swiotlb 0 | 13 | #define swiotlb 0 |
12 | static inline int pci_swiotlb_detect(void) | 14 | static inline int pci_swiotlb_detect_override(void) |
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | static inline int pci_swiotlb_detect_4gb(void) | ||
13 | { | 19 | { |
14 | return 0; | 20 | return 0; |
15 | } | 21 | } |
16 | static inline void pci_swiotlb_init(void) | 22 | static inline void pci_swiotlb_init(void) |
17 | { | 23 | { |
18 | } | 24 | } |
25 | static inline void pci_swiotlb_late_init(void) | ||
26 | { | ||
27 | } | ||
19 | #endif | 28 | #endif |
20 | 29 | ||
21 | static inline void dma_mark_clean(void *addr, size_t size) {} | 30 | static inline void dma_mark_clean(void *addr, size_t size) {} |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 80a93dc99076..2c833d8c4141 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -45,6 +45,7 @@ obj-y += bootflag.o e820.o | |||
45 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 45 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
46 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 46 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
47 | obj-y += tsc.o io_delay.o rtc.o | 47 | obj-y += tsc.o io_delay.o rtc.o |
48 | obj-y += pci-iommu_table.o | ||
48 | 49 | ||
49 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 50 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
50 | obj-y += process.o | 51 | obj-y += process.o |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 33cec152070d..e1252074ea40 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/acpi.h> | 8 | #include <linux/acpi.h> |
9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
10 | #include <linux/memblock.h> | ||
10 | #include <linux/dmi.h> | 11 | #include <linux/dmi.h> |
11 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
12 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
@@ -125,7 +126,7 @@ void acpi_restore_state_mem(void) | |||
125 | */ | 126 | */ |
126 | void __init acpi_reserve_wakeup_memory(void) | 127 | void __init acpi_reserve_wakeup_memory(void) |
127 | { | 128 | { |
128 | unsigned long mem; | 129 | phys_addr_t mem; |
129 | 130 | ||
130 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | 131 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { |
131 | printk(KERN_ERR | 132 | printk(KERN_ERR |
@@ -133,15 +134,15 @@ void __init acpi_reserve_wakeup_memory(void) | |||
133 | return; | 134 | return; |
134 | } | 135 | } |
135 | 136 | ||
136 | mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); | 137 | mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); |
137 | 138 | ||
138 | if (mem == -1L) { | 139 | if (mem == MEMBLOCK_ERROR) { |
139 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | 140 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); |
140 | return; | 141 | return; |
141 | } | 142 | } |
142 | acpi_realmode = (unsigned long) phys_to_virt(mem); | 143 | acpi_realmode = (unsigned long) phys_to_virt(mem); |
143 | acpi_wakeup_address = mem; | 144 | acpi_wakeup_address = mem; |
144 | reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | 145 | memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); |
145 | } | 146 | } |
146 | 147 | ||
147 | 148 | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cb482e123de..6e11c8134158 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
32 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
33 | #include <asm/x86_init.h> | 33 | #include <asm/x86_init.h> |
34 | 34 | #include <asm/iommu_table.h> | |
35 | /* | 35 | /* |
36 | * definitions for the ACPI scanning code | 36 | * definitions for the ACPI scanning code |
37 | */ | 37 | */ |
@@ -1499,13 +1499,13 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
1499 | return 0; | 1499 | return 0; |
1500 | } | 1500 | } |
1501 | 1501 | ||
1502 | void __init amd_iommu_detect(void) | 1502 | int __init amd_iommu_detect(void) |
1503 | { | 1503 | { |
1504 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1504 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
1505 | return; | 1505 | return -ENODEV; |
1506 | 1506 | ||
1507 | if (amd_iommu_disabled) | 1507 | if (amd_iommu_disabled) |
1508 | return; | 1508 | return -ENODEV; |
1509 | 1509 | ||
1510 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1510 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
1511 | iommu_detected = 1; | 1511 | iommu_detected = 1; |
@@ -1514,7 +1514,9 @@ void __init amd_iommu_detect(void) | |||
1514 | 1514 | ||
1515 | /* Make sure ACS will be enabled */ | 1515 | /* Make sure ACS will be enabled */ |
1516 | pci_request_acs(); | 1516 | pci_request_acs(); |
1517 | return 1; | ||
1517 | } | 1518 | } |
1519 | return -ENODEV; | ||
1518 | } | 1520 | } |
1519 | 1521 | ||
1520 | /**************************************************************************** | 1522 | /**************************************************************************** |
@@ -1545,3 +1547,8 @@ static int __init parse_amd_iommu_options(char *str) | |||
1545 | 1547 | ||
1546 | __setup("amd_iommu_dump", parse_amd_iommu_dump); | 1548 | __setup("amd_iommu_dump", parse_amd_iommu_dump); |
1547 | __setup("amd_iommu=", parse_amd_iommu_options); | 1549 | __setup("amd_iommu=", parse_amd_iommu_options); |
1550 | |||
1551 | IOMMU_INIT_FINISH(amd_iommu_detect, | ||
1552 | gart_iommu_hole_init, | ||
1553 | 0, | ||
1554 | 0); | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 377f5db3b8b4..b3a16e8f0703 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -371,7 +371,7 @@ void __init early_gart_iommu_check(void) | |||
371 | 371 | ||
372 | static int __initdata printed_gart_size_msg; | 372 | static int __initdata printed_gart_size_msg; |
373 | 373 | ||
374 | void __init gart_iommu_hole_init(void) | 374 | int __init gart_iommu_hole_init(void) |
375 | { | 375 | { |
376 | u32 agp_aper_base = 0, agp_aper_order = 0; | 376 | u32 agp_aper_base = 0, agp_aper_order = 0; |
377 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; | 377 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
@@ -381,7 +381,7 @@ void __init gart_iommu_hole_init(void) | |||
381 | 381 | ||
382 | if (gart_iommu_aperture_disabled || !fix_aperture || | 382 | if (gart_iommu_aperture_disabled || !fix_aperture || |
383 | !early_pci_allowed()) | 383 | !early_pci_allowed()) |
384 | return; | 384 | return -ENODEV; |
385 | 385 | ||
386 | printk(KERN_INFO "Checking aperture...\n"); | 386 | printk(KERN_INFO "Checking aperture...\n"); |
387 | 387 | ||
@@ -463,8 +463,9 @@ out: | |||
463 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; | 463 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; |
464 | 464 | ||
465 | insert_aperture_resource((u32)last_aper_base, n); | 465 | insert_aperture_resource((u32)last_aper_base, n); |
466 | return 1; | ||
466 | } | 467 | } |
467 | return; | 468 | return 0; |
468 | } | 469 | } |
469 | 470 | ||
470 | if (!fallback_aper_force) { | 471 | if (!fallback_aper_force) { |
@@ -500,7 +501,7 @@ out: | |||
500 | panic("Not enough memory for aperture"); | 501 | panic("Not enough memory for aperture"); |
501 | } | 502 | } |
502 | } else { | 503 | } else { |
503 | return; | 504 | return 0; |
504 | } | 505 | } |
505 | 506 | ||
506 | /* Fix up the north bridges */ | 507 | /* Fix up the north bridges */ |
@@ -526,4 +527,6 @@ out: | |||
526 | } | 527 | } |
527 | 528 | ||
528 | set_up_gart_resume(aper_order, aper_alloc); | 529 | set_up_gart_resume(aper_order, aper_alloc); |
530 | |||
531 | return 1; | ||
529 | } | 532 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 3e28401f161c..960f26ab5c9f 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/nodemask.h> | 26 | #include <linux/nodemask.h> |
27 | #include <linux/topology.h> | 27 | #include <linux/topology.h> |
28 | #include <linux/bootmem.h> | 28 | #include <linux/bootmem.h> |
29 | #include <linux/memblock.h> | ||
29 | #include <linux/threads.h> | 30 | #include <linux/threads.h> |
30 | #include <linux/cpumask.h> | 31 | #include <linux/cpumask.h> |
31 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
@@ -88,7 +89,7 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | |||
88 | node_end_pfn[node] = | 89 | node_end_pfn[node] = |
89 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | 90 | MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); |
90 | 91 | ||
91 | e820_register_active_regions(node, node_start_pfn[node], | 92 | memblock_x86_register_active_regions(node, node_start_pfn[node], |
92 | node_end_pfn[node]); | 93 | node_end_pfn[node]); |
93 | 94 | ||
94 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); | 95 | memory_present(node, node_start_pfn[node], node_end_pfn[node]); |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index fc999e6fc46a..13a389179514 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -2,7 +2,8 @@ | |||
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/kthread.h> | 3 | #include <linux/kthread.h> |
4 | #include <linux/workqueue.h> | 4 | #include <linux/workqueue.h> |
5 | #include <asm/e820.h> | 5 | #include <linux/memblock.h> |
6 | |||
6 | #include <asm/proto.h> | 7 | #include <asm/proto.h> |
7 | 8 | ||
8 | /* | 9 | /* |
@@ -18,10 +19,12 @@ static int __read_mostly memory_corruption_check = -1; | |||
18 | static unsigned __read_mostly corruption_check_size = 64*1024; | 19 | static unsigned __read_mostly corruption_check_size = 64*1024; |
19 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | 20 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ |
20 | 21 | ||
21 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | 22 | static struct scan_area { |
23 | u64 addr; | ||
24 | u64 size; | ||
25 | } scan_areas[MAX_SCAN_AREAS]; | ||
22 | static int num_scan_areas; | 26 | static int num_scan_areas; |
23 | 27 | ||
24 | |||
25 | static __init int set_corruption_check(char *arg) | 28 | static __init int set_corruption_check(char *arg) |
26 | { | 29 | { |
27 | char *end; | 30 | char *end; |
@@ -81,9 +84,9 @@ void __init setup_bios_corruption_check(void) | |||
81 | 84 | ||
82 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 85 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { |
83 | u64 size; | 86 | u64 size; |
84 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | 87 | addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); |
85 | 88 | ||
86 | if (!(addr + 1)) | 89 | if (addr == MEMBLOCK_ERROR) |
87 | break; | 90 | break; |
88 | 91 | ||
89 | if (addr >= corruption_check_size) | 92 | if (addr >= corruption_check_size) |
@@ -92,7 +95,7 @@ void __init setup_bios_corruption_check(void) | |||
92 | if ((addr + size) > corruption_check_size) | 95 | if ((addr + size) > corruption_check_size) |
93 | size = corruption_check_size - addr; | 96 | size = corruption_check_size - addr; |
94 | 97 | ||
95 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | 98 | memblock_x86_reserve_range(addr, addr + size, "SCAN RAM"); |
96 | scan_areas[num_scan_areas].addr = addr; | 99 | scan_areas[num_scan_areas].addr = addr; |
97 | scan_areas[num_scan_areas].size = size; | 100 | scan_areas[num_scan_areas].size = size; |
98 | num_scan_areas++; | 101 | num_scan_areas++; |
@@ -105,7 +108,6 @@ void __init setup_bios_corruption_check(void) | |||
105 | 108 | ||
106 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | 109 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", |
107 | num_scan_areas); | 110 | num_scan_areas); |
108 | update_e820(); | ||
109 | } | 111 | } |
110 | 112 | ||
111 | 113 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fe73c1844a9a..a333bf9189f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -238,6 +238,7 @@ struct x86_pmu { | |||
238 | * Intel DebugStore bits | 238 | * Intel DebugStore bits |
239 | */ | 239 | */ |
240 | int bts, pebs; | 240 | int bts, pebs; |
241 | int bts_active, pebs_active; | ||
241 | int pebs_record_size; | 242 | int pebs_record_size; |
242 | void (*drain_pebs)(struct pt_regs *regs); | 243 | void (*drain_pebs)(struct pt_regs *regs); |
243 | struct event_constraint *pebs_constraints; | 244 | struct event_constraint *pebs_constraints; |
@@ -381,7 +382,7 @@ static void release_pmc_hardware(void) {} | |||
381 | 382 | ||
382 | #endif | 383 | #endif |
383 | 384 | ||
384 | static int reserve_ds_buffers(void); | 385 | static void reserve_ds_buffers(void); |
385 | static void release_ds_buffers(void); | 386 | static void release_ds_buffers(void); |
386 | 387 | ||
387 | static void hw_perf_event_destroy(struct perf_event *event) | 388 | static void hw_perf_event_destroy(struct perf_event *event) |
@@ -478,7 +479,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
478 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 479 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
479 | (hwc->sample_period == 1)) { | 480 | (hwc->sample_period == 1)) { |
480 | /* BTS is not supported by this architecture. */ | 481 | /* BTS is not supported by this architecture. */ |
481 | if (!x86_pmu.bts) | 482 | if (!x86_pmu.bts_active) |
482 | return -EOPNOTSUPP; | 483 | return -EOPNOTSUPP; |
483 | 484 | ||
484 | /* BTS is currently only allowed for user-mode. */ | 485 | /* BTS is currently only allowed for user-mode. */ |
@@ -497,12 +498,13 @@ static int x86_pmu_hw_config(struct perf_event *event) | |||
497 | int precise = 0; | 498 | int precise = 0; |
498 | 499 | ||
499 | /* Support for constant skid */ | 500 | /* Support for constant skid */ |
500 | if (x86_pmu.pebs) | 501 | if (x86_pmu.pebs_active) { |
501 | precise++; | 502 | precise++; |
502 | 503 | ||
503 | /* Support for IP fixup */ | 504 | /* Support for IP fixup */ |
504 | if (x86_pmu.lbr_nr) | 505 | if (x86_pmu.lbr_nr) |
505 | precise++; | 506 | precise++; |
507 | } | ||
506 | 508 | ||
507 | if (event->attr.precise_ip > precise) | 509 | if (event->attr.precise_ip > precise) |
508 | return -EOPNOTSUPP; | 510 | return -EOPNOTSUPP; |
@@ -544,11 +546,8 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
544 | if (atomic_read(&active_events) == 0) { | 546 | if (atomic_read(&active_events) == 0) { |
545 | if (!reserve_pmc_hardware()) | 547 | if (!reserve_pmc_hardware()) |
546 | err = -EBUSY; | 548 | err = -EBUSY; |
547 | else { | 549 | else |
548 | err = reserve_ds_buffers(); | 550 | reserve_ds_buffers(); |
549 | if (err) | ||
550 | release_pmc_hardware(); | ||
551 | } | ||
552 | } | 551 | } |
553 | if (!err) | 552 | if (!err) |
554 | atomic_inc(&active_events); | 553 | atomic_inc(&active_events); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 4977f9c400e5..b7dcd9f2b8a0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu) | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | 74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); |
75 | } | 75 | } |
76 | 76 | ||
77 | static int alloc_pebs_buffer(int cpu) | ||
78 | { | ||
79 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
80 | int node = cpu_to_node(cpu); | ||
81 | int max, thresh = 1; /* always use a single PEBS record */ | ||
82 | void *buffer; | ||
83 | |||
84 | if (!x86_pmu.pebs) | ||
85 | return 0; | ||
86 | |||
87 | buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); | ||
88 | if (unlikely(!buffer)) | ||
89 | return -ENOMEM; | ||
90 | |||
91 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
92 | |||
93 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
94 | ds->pebs_index = ds->pebs_buffer_base; | ||
95 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
96 | max * x86_pmu.pebs_record_size; | ||
97 | |||
98 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
99 | thresh * x86_pmu.pebs_record_size; | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void release_pebs_buffer(int cpu) | ||
105 | { | ||
106 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
107 | |||
108 | if (!ds || !x86_pmu.pebs) | ||
109 | return; | ||
110 | |||
111 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
112 | ds->pebs_buffer_base = 0; | ||
113 | } | ||
114 | |||
115 | static int alloc_bts_buffer(int cpu) | ||
116 | { | ||
117 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
118 | int node = cpu_to_node(cpu); | ||
119 | int max, thresh; | ||
120 | void *buffer; | ||
121 | |||
122 | if (!x86_pmu.bts) | ||
123 | return 0; | ||
124 | |||
125 | buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); | ||
126 | if (unlikely(!buffer)) | ||
127 | return -ENOMEM; | ||
128 | |||
129 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
130 | thresh = max / 16; | ||
131 | |||
132 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
133 | ds->bts_index = ds->bts_buffer_base; | ||
134 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
135 | max * BTS_RECORD_SIZE; | ||
136 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
137 | thresh * BTS_RECORD_SIZE; | ||
138 | |||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static void release_bts_buffer(int cpu) | ||
143 | { | ||
144 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
145 | |||
146 | if (!ds || !x86_pmu.bts) | ||
147 | return; | ||
148 | |||
149 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
150 | ds->bts_buffer_base = 0; | ||
151 | } | ||
152 | |||
153 | static int alloc_ds_buffer(int cpu) | ||
154 | { | ||
155 | int node = cpu_to_node(cpu); | ||
156 | struct debug_store *ds; | ||
157 | |||
158 | ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); | ||
159 | if (unlikely(!ds)) | ||
160 | return -ENOMEM; | ||
161 | |||
162 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
163 | |||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void release_ds_buffer(int cpu) | ||
168 | { | ||
169 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
170 | |||
171 | if (!ds) | ||
172 | return; | ||
173 | |||
174 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
175 | kfree(ds); | ||
176 | } | ||
177 | |||
77 | static void release_ds_buffers(void) | 178 | static void release_ds_buffers(void) |
78 | { | 179 | { |
79 | int cpu; | 180 | int cpu; |
@@ -82,93 +183,77 @@ static void release_ds_buffers(void) | |||
82 | return; | 183 | return; |
83 | 184 | ||
84 | get_online_cpus(); | 185 | get_online_cpus(); |
85 | |||
86 | for_each_online_cpu(cpu) | 186 | for_each_online_cpu(cpu) |
87 | fini_debug_store_on_cpu(cpu); | 187 | fini_debug_store_on_cpu(cpu); |
88 | 188 | ||
89 | for_each_possible_cpu(cpu) { | 189 | for_each_possible_cpu(cpu) { |
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 190 | release_pebs_buffer(cpu); |
91 | 191 | release_bts_buffer(cpu); | |
92 | if (!ds) | 192 | release_ds_buffer(cpu); |
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | 193 | } |
101 | |||
102 | put_online_cpus(); | 194 | put_online_cpus(); |
103 | } | 195 | } |
104 | 196 | ||
105 | static int reserve_ds_buffers(void) | 197 | static void reserve_ds_buffers(void) |
106 | { | 198 | { |
107 | int cpu, err = 0; | 199 | int bts_err = 0, pebs_err = 0; |
200 | int cpu; | ||
201 | |||
202 | x86_pmu.bts_active = 0; | ||
203 | x86_pmu.pebs_active = 0; | ||
108 | 204 | ||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | 205 | if (!x86_pmu.bts && !x86_pmu.pebs) |
110 | return 0; | 206 | return; |
207 | |||
208 | if (!x86_pmu.bts) | ||
209 | bts_err = 1; | ||
210 | |||
211 | if (!x86_pmu.pebs) | ||
212 | pebs_err = 1; | ||
111 | 213 | ||
112 | get_online_cpus(); | 214 | get_online_cpus(); |
113 | 215 | ||
114 | for_each_possible_cpu(cpu) { | 216 | for_each_possible_cpu(cpu) { |
115 | struct debug_store *ds; | 217 | if (alloc_ds_buffer(cpu)) { |
116 | void *buffer; | 218 | bts_err = 1; |
117 | int max, thresh; | 219 | pebs_err = 1; |
220 | } | ||
221 | |||
222 | if (!bts_err && alloc_bts_buffer(cpu)) | ||
223 | bts_err = 1; | ||
118 | 224 | ||
119 | err = -ENOMEM; | 225 | if (!pebs_err && alloc_pebs_buffer(cpu)) |
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | 226 | pebs_err = 1; |
121 | if (unlikely(!ds)) | 227 | |
228 | if (bts_err && pebs_err) | ||
122 | break; | 229 | break; |
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | 230 | } |
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | 231 | ||
141 | if (x86_pmu.pebs) { | 232 | if (bts_err) { |
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | 233 | for_each_possible_cpu(cpu) |
143 | if (unlikely(!buffer)) | 234 | release_bts_buffer(cpu); |
144 | break; | 235 | } |
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | 236 | ||
159 | err = 0; | 237 | if (pebs_err) { |
238 | for_each_possible_cpu(cpu) | ||
239 | release_pebs_buffer(cpu); | ||
160 | } | 240 | } |
161 | 241 | ||
162 | if (err) | 242 | if (bts_err && pebs_err) { |
163 | release_ds_buffers(); | 243 | for_each_possible_cpu(cpu) |
164 | else { | 244 | release_ds_buffer(cpu); |
245 | } else { | ||
246 | if (x86_pmu.bts && !bts_err) | ||
247 | x86_pmu.bts_active = 1; | ||
248 | |||
249 | if (x86_pmu.pebs && !pebs_err) | ||
250 | x86_pmu.pebs_active = 1; | ||
251 | |||
165 | for_each_online_cpu(cpu) | 252 | for_each_online_cpu(cpu) |
166 | init_debug_store_on_cpu(cpu); | 253 | init_debug_store_on_cpu(cpu); |
167 | } | 254 | } |
168 | 255 | ||
169 | put_online_cpus(); | 256 | put_online_cpus(); |
170 | |||
171 | return err; | ||
172 | } | 257 | } |
173 | 258 | ||
174 | /* | 259 | /* |
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void) | |||
233 | if (!event) | 318 | if (!event) |
234 | return 0; | 319 | return 0; |
235 | 320 | ||
236 | if (!ds) | 321 | if (!x86_pmu.bts_active) |
237 | return 0; | 322 | return 0; |
238 | 323 | ||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | 324 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; |
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | |||
503 | struct pebs_record_core *at, *top; | 588 | struct pebs_record_core *at, *top; |
504 | int n; | 589 | int n; |
505 | 590 | ||
506 | if (!ds || !x86_pmu.pebs) | 591 | if (!x86_pmu.pebs_active) |
507 | return; | 592 | return; |
508 | 593 | ||
509 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | 594 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; |
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
545 | u64 status = 0; | 630 | u64 status = 0; |
546 | int bit, n; | 631 | int bit, n; |
547 | 632 | ||
548 | if (!ds || !x86_pmu.pebs) | 633 | if (!x86_pmu.pebs_active) |
549 | return; | 634 | return; |
550 | 635 | ||
551 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | 636 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; |
@@ -630,9 +715,8 @@ static void intel_ds_init(void) | |||
630 | 715 | ||
631 | #else /* CONFIG_CPU_SUP_INTEL */ | 716 | #else /* CONFIG_CPU_SUP_INTEL */ |
632 | 717 | ||
633 | static int reserve_ds_buffers(void) | 718 | static void reserve_ds_buffers(void) |
634 | { | 719 | { |
635 | return 0; | ||
636 | } | 720 | } |
637 | 721 | ||
638 | static void release_ds_buffers(void) | 722 | static void release_ds_buffers(void) |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 0d6fc71bedb1..0c2b7ef7a34d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
16 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
17 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
18 | #include <linux/memblock.h> | ||
18 | 19 | ||
19 | #include <asm/e820.h> | 20 | #include <asm/e820.h> |
20 | #include <asm/proto.h> | 21 | #include <asm/proto.h> |
@@ -738,73 +739,7 @@ core_initcall(e820_mark_nvs_memory); | |||
738 | #endif | 739 | #endif |
739 | 740 | ||
740 | /* | 741 | /* |
741 | * Find a free area with specified alignment in a specific range. | 742 | * pre allocated 4k and reserved it in memblock and e820_saved |
742 | */ | ||
743 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
744 | { | ||
745 | int i; | ||
746 | |||
747 | for (i = 0; i < e820.nr_map; i++) { | ||
748 | struct e820entry *ei = &e820.map[i]; | ||
749 | u64 addr; | ||
750 | u64 ei_start, ei_last; | ||
751 | |||
752 | if (ei->type != E820_RAM) | ||
753 | continue; | ||
754 | |||
755 | ei_last = ei->addr + ei->size; | ||
756 | ei_start = ei->addr; | ||
757 | addr = find_early_area(ei_start, ei_last, start, end, | ||
758 | size, align); | ||
759 | |||
760 | if (addr != -1ULL) | ||
761 | return addr; | ||
762 | } | ||
763 | return -1ULL; | ||
764 | } | ||
765 | |||
766 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) | ||
767 | { | ||
768 | return find_e820_area(start, end, size, align); | ||
769 | } | ||
770 | |||
771 | u64 __init get_max_mapped(void) | ||
772 | { | ||
773 | u64 end = max_pfn_mapped; | ||
774 | |||
775 | end <<= PAGE_SHIFT; | ||
776 | |||
777 | return end; | ||
778 | } | ||
779 | /* | ||
780 | * Find next free range after *start | ||
781 | */ | ||
782 | u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | ||
783 | { | ||
784 | int i; | ||
785 | |||
786 | for (i = 0; i < e820.nr_map; i++) { | ||
787 | struct e820entry *ei = &e820.map[i]; | ||
788 | u64 addr; | ||
789 | u64 ei_start, ei_last; | ||
790 | |||
791 | if (ei->type != E820_RAM) | ||
792 | continue; | ||
793 | |||
794 | ei_last = ei->addr + ei->size; | ||
795 | ei_start = ei->addr; | ||
796 | addr = find_early_area_size(ei_start, ei_last, start, | ||
797 | sizep, align); | ||
798 | |||
799 | if (addr != -1ULL) | ||
800 | return addr; | ||
801 | } | ||
802 | |||
803 | return -1ULL; | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * pre allocated 4k and reserved it in e820 | ||
808 | */ | 743 | */ |
809 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | 744 | u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) |
810 | { | 745 | { |
@@ -813,8 +748,8 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
813 | u64 start; | 748 | u64 start; |
814 | 749 | ||
815 | for (start = startt; ; start += size) { | 750 | for (start = startt; ; start += size) { |
816 | start = find_e820_area_size(start, &size, align); | 751 | start = memblock_x86_find_in_range_size(start, &size, align); |
817 | if (!(start + 1)) | 752 | if (start == MEMBLOCK_ERROR) |
818 | return 0; | 753 | return 0; |
819 | if (size >= sizet) | 754 | if (size >= sizet) |
820 | break; | 755 | break; |
@@ -830,10 +765,9 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
830 | addr = round_down(start + size - sizet, align); | 765 | addr = round_down(start + size - sizet, align); |
831 | if (addr < start) | 766 | if (addr < start) |
832 | return 0; | 767 | return 0; |
833 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); | 768 | memblock_x86_reserve_range(addr, addr + sizet, "new next"); |
834 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); | 769 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); |
835 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); | 770 | printk(KERN_INFO "update e820_saved for early_reserve_e820\n"); |
836 | update_e820(); | ||
837 | update_e820_saved(); | 771 | update_e820_saved(); |
838 | 772 | ||
839 | return addr; | 773 | return addr; |
@@ -895,74 +829,6 @@ unsigned long __init e820_end_of_low_ram_pfn(void) | |||
895 | { | 829 | { |
896 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); | 830 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); |
897 | } | 831 | } |
898 | /* | ||
899 | * Finds an active region in the address range from start_pfn to last_pfn and | ||
900 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | ||
901 | */ | ||
902 | int __init e820_find_active_region(const struct e820entry *ei, | ||
903 | unsigned long start_pfn, | ||
904 | unsigned long last_pfn, | ||
905 | unsigned long *ei_startpfn, | ||
906 | unsigned long *ei_endpfn) | ||
907 | { | ||
908 | u64 align = PAGE_SIZE; | ||
909 | |||
910 | *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; | ||
911 | *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; | ||
912 | |||
913 | /* Skip map entries smaller than a page */ | ||
914 | if (*ei_startpfn >= *ei_endpfn) | ||
915 | return 0; | ||
916 | |||
917 | /* Skip if map is outside the node */ | ||
918 | if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || | ||
919 | *ei_startpfn >= last_pfn) | ||
920 | return 0; | ||
921 | |||
922 | /* Check for overlaps */ | ||
923 | if (*ei_startpfn < start_pfn) | ||
924 | *ei_startpfn = start_pfn; | ||
925 | if (*ei_endpfn > last_pfn) | ||
926 | *ei_endpfn = last_pfn; | ||
927 | |||
928 | return 1; | ||
929 | } | ||
930 | |||
931 | /* Walk the e820 map and register active regions within a node */ | ||
932 | void __init e820_register_active_regions(int nid, unsigned long start_pfn, | ||
933 | unsigned long last_pfn) | ||
934 | { | ||
935 | unsigned long ei_startpfn; | ||
936 | unsigned long ei_endpfn; | ||
937 | int i; | ||
938 | |||
939 | for (i = 0; i < e820.nr_map; i++) | ||
940 | if (e820_find_active_region(&e820.map[i], | ||
941 | start_pfn, last_pfn, | ||
942 | &ei_startpfn, &ei_endpfn)) | ||
943 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
944 | } | ||
945 | |||
946 | /* | ||
947 | * Find the hole size (in bytes) in the memory range. | ||
948 | * @start: starting address of the memory range to scan | ||
949 | * @end: ending address of the memory range to scan | ||
950 | */ | ||
951 | u64 __init e820_hole_size(u64 start, u64 end) | ||
952 | { | ||
953 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
954 | unsigned long last_pfn = end >> PAGE_SHIFT; | ||
955 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
956 | int i; | ||
957 | |||
958 | for (i = 0; i < e820.nr_map; i++) { | ||
959 | if (e820_find_active_region(&e820.map[i], | ||
960 | start_pfn, last_pfn, | ||
961 | &ei_startpfn, &ei_endpfn)) | ||
962 | ram += ei_endpfn - ei_startpfn; | ||
963 | } | ||
964 | return end - start - ((u64)ram << PAGE_SHIFT); | ||
965 | } | ||
966 | 832 | ||
967 | static void early_panic(char *msg) | 833 | static void early_panic(char *msg) |
968 | { | 834 | { |
@@ -1210,3 +1076,48 @@ void __init setup_memory_map(void) | |||
1210 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1076 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
1211 | e820_print_map(who); | 1077 | e820_print_map(who); |
1212 | } | 1078 | } |
1079 | |||
1080 | void __init memblock_x86_fill(void) | ||
1081 | { | ||
1082 | int i; | ||
1083 | u64 end; | ||
1084 | |||
1085 | /* | ||
1086 | * EFI may have more than 128 entries | ||
1087 | * We are safe to enable resizing, beause memblock_x86_fill() | ||
1088 | * is rather later for x86 | ||
1089 | */ | ||
1090 | memblock_can_resize = 1; | ||
1091 | |||
1092 | for (i = 0; i < e820.nr_map; i++) { | ||
1093 | struct e820entry *ei = &e820.map[i]; | ||
1094 | |||
1095 | end = ei->addr + ei->size; | ||
1096 | if (end != (resource_size_t)end) | ||
1097 | continue; | ||
1098 | |||
1099 | if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN) | ||
1100 | continue; | ||
1101 | |||
1102 | memblock_add(ei->addr, ei->size); | ||
1103 | } | ||
1104 | |||
1105 | memblock_analyze(); | ||
1106 | memblock_dump_all(); | ||
1107 | } | ||
1108 | |||
1109 | void __init memblock_find_dma_reserve(void) | ||
1110 | { | ||
1111 | #ifdef CONFIG_X86_64 | ||
1112 | u64 free_size_pfn; | ||
1113 | u64 mem_size_pfn; | ||
1114 | /* | ||
1115 | * need to find out used area below MAX_DMA_PFN | ||
1116 | * need to use memblock to get free size in [0, MAX_DMA_PFN] | ||
1117 | * at first, and assume boot_mem will not take below MAX_DMA_PFN | ||
1118 | */ | ||
1119 | mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | ||
1120 | free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; | ||
1121 | set_dma_reserve(mem_size_pfn - free_size_pfn); | ||
1122 | #endif | ||
1123 | } | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index c2fa9b8b497e..0fe27d7c6258 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/efi.h> | 31 | #include <linux/efi.h> |
32 | #include <linux/bootmem.h> | 32 | #include <linux/bootmem.h> |
33 | #include <linux/memblock.h> | ||
33 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
34 | #include <linux/uaccess.h> | 35 | #include <linux/uaccess.h> |
35 | #include <linux/time.h> | 36 | #include <linux/time.h> |
@@ -275,7 +276,7 @@ static void __init do_add_efi_memmap(void) | |||
275 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 276 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
276 | } | 277 | } |
277 | 278 | ||
278 | void __init efi_reserve_early(void) | 279 | void __init efi_memblock_x86_reserve_range(void) |
279 | { | 280 | { |
280 | unsigned long pmap; | 281 | unsigned long pmap; |
281 | 282 | ||
@@ -290,7 +291,7 @@ void __init efi_reserve_early(void) | |||
290 | boot_params.efi_info.efi_memdesc_size; | 291 | boot_params.efi_info.efi_memdesc_size; |
291 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; | 292 | memmap.desc_version = boot_params.efi_info.efi_memdesc_version; |
292 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; | 293 | memmap.desc_size = boot_params.efi_info.efi_memdesc_size; |
293 | reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size, | 294 | memblock_x86_reserve_range(pmap, pmap + memmap.nr_map * memmap.desc_size, |
294 | "EFI memmap"); | 295 | "EFI memmap"); |
295 | } | 296 | } |
296 | 297 | ||
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 3e66bd364a9d..af0699ba48cf 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | 2 | #include <linux/init.h> |
3 | #include <linux/memblock.h> | ||
3 | 4 | ||
4 | #include <asm/setup.h> | 5 | #include <asm/setup.h> |
5 | #include <asm/bios_ebda.h> | 6 | #include <asm/bios_ebda.h> |
@@ -51,5 +52,5 @@ void __init reserve_ebda_region(void) | |||
51 | lowmem = 0x9f000; | 52 | lowmem = 0x9f000; |
52 | 53 | ||
53 | /* reserve all memory between lowmem and the 1MB mark */ | 54 | /* reserve all memory between lowmem and the 1MB mark */ |
54 | reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); | 55 | memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); |
55 | } | 56 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 784360c0625c..9a6ca2392170 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/start_kernel.h> | 9 | #include <linux/start_kernel.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/memblock.h> | ||
11 | 12 | ||
12 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
13 | #include <asm/sections.h> | 14 | #include <asm/sections.h> |
@@ -30,17 +31,18 @@ static void __init i386_default_early_setup(void) | |||
30 | 31 | ||
31 | void __init i386_start_kernel(void) | 32 | void __init i386_start_kernel(void) |
32 | { | 33 | { |
34 | memblock_init(); | ||
35 | |||
33 | #ifdef CONFIG_X86_TRAMPOLINE | 36 | #ifdef CONFIG_X86_TRAMPOLINE |
34 | /* | 37 | /* |
35 | * But first pinch a few for the stack/trampoline stuff | 38 | * But first pinch a few for the stack/trampoline stuff |
36 | * FIXME: Don't need the extra page at 4K, but need to fix | 39 | * FIXME: Don't need the extra page at 4K, but need to fix |
37 | * trampoline before removing it. (see the GDT stuff) | 40 | * trampoline before removing it. (see the GDT stuff) |
38 | */ | 41 | */ |
39 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | 42 | memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); |
40 | "EX TRAMPOLINE"); | ||
41 | #endif | 43 | #endif |
42 | 44 | ||
43 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 45 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
44 | 46 | ||
45 | #ifdef CONFIG_BLK_DEV_INITRD | 47 | #ifdef CONFIG_BLK_DEV_INITRD |
46 | /* Reserve INITRD */ | 48 | /* Reserve INITRD */ |
@@ -49,7 +51,7 @@ void __init i386_start_kernel(void) | |||
49 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 51 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
50 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 52 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
51 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 53 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
52 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 54 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); |
53 | } | 55 | } |
54 | #endif | 56 | #endif |
55 | 57 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7147143fd614..2d2673c28aff 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
13 | #include <linux/start_kernel.h> | 13 | #include <linux/start_kernel.h> |
14 | #include <linux/io.h> | 14 | #include <linux/io.h> |
15 | #include <linux/memblock.h> | ||
15 | 16 | ||
16 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
17 | #include <asm/proto.h> | 18 | #include <asm/proto.h> |
@@ -79,6 +80,8 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
79 | /* Cleanup the over mapped high alias */ | 80 | /* Cleanup the over mapped high alias */ |
80 | cleanup_highmap(); | 81 | cleanup_highmap(); |
81 | 82 | ||
83 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
84 | |||
82 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 85 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { |
83 | #ifdef CONFIG_EARLY_PRINTK | 86 | #ifdef CONFIG_EARLY_PRINTK |
84 | set_intr_gate(i, &early_idt_handlers[i]); | 87 | set_intr_gate(i, &early_idt_handlers[i]); |
@@ -98,7 +101,9 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
98 | { | 101 | { |
99 | copy_bootdata(__va(real_mode_data)); | 102 | copy_bootdata(__va(real_mode_data)); |
100 | 103 | ||
101 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 104 | memblock_init(); |
105 | |||
106 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | ||
102 | 107 | ||
103 | #ifdef CONFIG_BLK_DEV_INITRD | 108 | #ifdef CONFIG_BLK_DEV_INITRD |
104 | /* Reserve INITRD */ | 109 | /* Reserve INITRD */ |
@@ -107,7 +112,7 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
107 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 112 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
108 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 113 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; |
109 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 114 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
110 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 115 | memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); |
111 | } | 116 | } |
112 | #endif | 117 | #endif |
113 | 118 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index d7b6f7fb4fec..9af64d9c4b67 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
14 | #include <linux/memblock.h> | ||
14 | #include <linux/kernel_stat.h> | 15 | #include <linux/kernel_stat.h> |
15 | #include <linux/mc146818rtc.h> | 16 | #include <linux/mc146818rtc.h> |
16 | #include <linux/bitops.h> | 17 | #include <linux/bitops.h> |
@@ -657,7 +658,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) | |||
657 | { | 658 | { |
658 | unsigned long size = get_mpc_size(mpf->physptr); | 659 | unsigned long size = get_mpc_size(mpf->physptr); |
659 | 660 | ||
660 | reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); | 661 | memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc"); |
661 | } | 662 | } |
662 | 663 | ||
663 | static int __init smp_scan_config(unsigned long base, unsigned long length) | 664 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
@@ -686,7 +687,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) | |||
686 | mpf, (u64)virt_to_phys(mpf)); | 687 | mpf, (u64)virt_to_phys(mpf)); |
687 | 688 | ||
688 | mem = virt_to_phys(mpf); | 689 | mem = virt_to_phys(mpf); |
689 | reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); | 690 | memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); |
690 | if (mpf->physptr) | 691 | if (mpf->physptr) |
691 | smp_reserve_memory(mpf); | 692 | smp_reserve_memory(mpf); |
692 | 693 | ||
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 078d4ec1a9d9..f56a117cef68 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <asm/rio.h> | 47 | #include <asm/rio.h> |
48 | #include <asm/bios_ebda.h> | 48 | #include <asm/bios_ebda.h> |
49 | #include <asm/x86_init.h> | 49 | #include <asm/x86_init.h> |
50 | #include <asm/iommu_table.h> | ||
50 | 51 | ||
51 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT | 52 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT |
52 | int use_calgary __read_mostly = 1; | 53 | int use_calgary __read_mostly = 1; |
@@ -1364,7 +1365,7 @@ static int __init calgary_iommu_init(void) | |||
1364 | return 0; | 1365 | return 0; |
1365 | } | 1366 | } |
1366 | 1367 | ||
1367 | void __init detect_calgary(void) | 1368 | int __init detect_calgary(void) |
1368 | { | 1369 | { |
1369 | int bus; | 1370 | int bus; |
1370 | void *tbl; | 1371 | void *tbl; |
@@ -1378,13 +1379,13 @@ void __init detect_calgary(void) | |||
1378 | * another HW IOMMU already, bail out. | 1379 | * another HW IOMMU already, bail out. |
1379 | */ | 1380 | */ |
1380 | if (no_iommu || iommu_detected) | 1381 | if (no_iommu || iommu_detected) |
1381 | return; | 1382 | return -ENODEV; |
1382 | 1383 | ||
1383 | if (!use_calgary) | 1384 | if (!use_calgary) |
1384 | return; | 1385 | return -ENODEV; |
1385 | 1386 | ||
1386 | if (!early_pci_allowed()) | 1387 | if (!early_pci_allowed()) |
1387 | return; | 1388 | return -ENODEV; |
1388 | 1389 | ||
1389 | printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); | 1390 | printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n"); |
1390 | 1391 | ||
@@ -1410,13 +1411,13 @@ void __init detect_calgary(void) | |||
1410 | if (!rio_table_hdr) { | 1411 | if (!rio_table_hdr) { |
1411 | printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " | 1412 | printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table " |
1412 | "in EBDA - bailing!\n"); | 1413 | "in EBDA - bailing!\n"); |
1413 | return; | 1414 | return -ENODEV; |
1414 | } | 1415 | } |
1415 | 1416 | ||
1416 | ret = build_detail_arrays(); | 1417 | ret = build_detail_arrays(); |
1417 | if (ret) { | 1418 | if (ret) { |
1418 | printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); | 1419 | printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret); |
1419 | return; | 1420 | return -ENOMEM; |
1420 | } | 1421 | } |
1421 | 1422 | ||
1422 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? | 1423 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? |
@@ -1464,7 +1465,7 @@ void __init detect_calgary(void) | |||
1464 | 1465 | ||
1465 | x86_init.iommu.iommu_init = calgary_iommu_init; | 1466 | x86_init.iommu.iommu_init = calgary_iommu_init; |
1466 | } | 1467 | } |
1467 | return; | 1468 | return calgary_found; |
1468 | 1469 | ||
1469 | cleanup: | 1470 | cleanup: |
1470 | for (--bus; bus >= 0; --bus) { | 1471 | for (--bus; bus >= 0; --bus) { |
@@ -1473,6 +1474,7 @@ cleanup: | |||
1473 | if (info->tce_space) | 1474 | if (info->tce_space) |
1474 | free_tce_table(info->tce_space); | 1475 | free_tce_table(info->tce_space); |
1475 | } | 1476 | } |
1477 | return -ENOMEM; | ||
1476 | } | 1478 | } |
1477 | 1479 | ||
1478 | static int __init calgary_parse_options(char *p) | 1480 | static int __init calgary_parse_options(char *p) |
@@ -1594,3 +1596,5 @@ static int __init calgary_fixup_tce_spaces(void) | |||
1594 | * and before device_initcall. | 1596 | * and before device_initcall. |
1595 | */ | 1597 | */ |
1596 | rootfs_initcall(calgary_fixup_tce_spaces); | 1598 | rootfs_initcall(calgary_fixup_tce_spaces); |
1599 | |||
1600 | IOMMU_INIT_POST(detect_calgary); | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 9f07cfcbd3a5..9ea999a4dcc1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -11,9 +11,8 @@ | |||
11 | #include <asm/iommu.h> | 11 | #include <asm/iommu.h> |
12 | #include <asm/gart.h> | 12 | #include <asm/gart.h> |
13 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
14 | #include <asm/amd_iommu.h> | ||
15 | #include <asm/x86_init.h> | 14 | #include <asm/x86_init.h> |
16 | #include <asm/xen/swiotlb-xen.h> | 15 | #include <asm/iommu_table.h> |
17 | 16 | ||
18 | static int forbid_dac __read_mostly; | 17 | static int forbid_dac __read_mostly; |
19 | 18 | ||
@@ -45,6 +44,8 @@ int iommu_detected __read_mostly = 0; | |||
45 | */ | 44 | */ |
46 | int iommu_pass_through __read_mostly; | 45 | int iommu_pass_through __read_mostly; |
47 | 46 | ||
47 | extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; | ||
48 | |||
48 | /* Dummy device used for NULL arguments (normally ISA). */ | 49 | /* Dummy device used for NULL arguments (normally ISA). */ |
49 | struct device x86_dma_fallback_dev = { | 50 | struct device x86_dma_fallback_dev = { |
50 | .init_name = "fallback device", | 51 | .init_name = "fallback device", |
@@ -130,26 +131,24 @@ static void __init dma32_free_bootmem(void) | |||
130 | 131 | ||
131 | void __init pci_iommu_alloc(void) | 132 | void __init pci_iommu_alloc(void) |
132 | { | 133 | { |
134 | struct iommu_table_entry *p; | ||
135 | |||
133 | /* free the range so iommu could get some range less than 4G */ | 136 | /* free the range so iommu could get some range less than 4G */ |
134 | dma32_free_bootmem(); | 137 | dma32_free_bootmem(); |
135 | 138 | ||
136 | if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) | 139 | sort_iommu_table(__iommu_table, __iommu_table_end); |
137 | goto out; | 140 | check_iommu_entries(__iommu_table, __iommu_table_end); |
138 | |||
139 | gart_iommu_hole_init(); | ||
140 | |||
141 | detect_calgary(); | ||
142 | |||
143 | detect_intel_iommu(); | ||
144 | 141 | ||
145 | /* needs to be called after gart_iommu_hole_init */ | 142 | for (p = __iommu_table; p < __iommu_table_end; p++) { |
146 | amd_iommu_detect(); | 143 | if (p && p->detect && p->detect() > 0) { |
147 | out: | 144 | p->flags |= IOMMU_DETECTED; |
148 | pci_xen_swiotlb_init(); | 145 | if (p->early_init) |
149 | 146 | p->early_init(); | |
150 | pci_swiotlb_init(); | 147 | if (p->flags & IOMMU_FINISH_IF_DETECTED) |
148 | break; | ||
149 | } | ||
150 | } | ||
151 | } | 151 | } |
152 | |||
153 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 152 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
154 | dma_addr_t *dma_addr, gfp_t flag) | 153 | dma_addr_t *dma_addr, gfp_t flag) |
155 | { | 154 | { |
@@ -292,6 +291,7 @@ EXPORT_SYMBOL(dma_supported); | |||
292 | 291 | ||
293 | static int __init pci_iommu_init(void) | 292 | static int __init pci_iommu_init(void) |
294 | { | 293 | { |
294 | struct iommu_table_entry *p; | ||
295 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); | 295 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); |
296 | 296 | ||
297 | #ifdef CONFIG_PCI | 297 | #ifdef CONFIG_PCI |
@@ -299,12 +299,10 @@ static int __init pci_iommu_init(void) | |||
299 | #endif | 299 | #endif |
300 | x86_init.iommu.iommu_init(); | 300 | x86_init.iommu.iommu_init(); |
301 | 301 | ||
302 | if (swiotlb || xen_swiotlb) { | 302 | for (p = __iommu_table; p < __iommu_table_end; p++) { |
303 | printk(KERN_INFO "PCI-DMA: " | 303 | if (p && (p->flags & IOMMU_DETECTED) && p->late_init) |
304 | "Using software bounce buffering for IO (SWIOTLB)\n"); | 304 | p->late_init(); |
305 | swiotlb_print_info(); | 305 | } |
306 | } else | ||
307 | swiotlb_free(); | ||
308 | 306 | ||
309 | return 0; | 307 | return 0; |
310 | } | 308 | } |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c562207b1b3d..ba0f0ca9f280 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/dma.h> | 41 | #include <asm/dma.h> |
42 | #include <asm/amd_nb.h> | 42 | #include <asm/amd_nb.h> |
43 | #include <asm/x86_init.h> | 43 | #include <asm/x86_init.h> |
44 | #include <asm/iommu_table.h> | ||
44 | 45 | ||
45 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 46 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
46 | static unsigned long iommu_size; /* size of remapping area bytes */ | 47 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -905,3 +906,4 @@ void __init gart_parse_options(char *p) | |||
905 | } | 906 | } |
906 | } | 907 | } |
907 | } | 908 | } |
909 | IOMMU_INIT_POST(gart_iommu_hole_init); | ||
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c new file mode 100644 index 000000000000..55d745ec1181 --- /dev/null +++ b/arch/x86/kernel/pci-iommu_table.c | |||
@@ -0,0 +1,89 @@ | |||
1 | #include <linux/dma-mapping.h> | ||
2 | #include <asm/iommu_table.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <linux/kallsyms.h> | ||
5 | |||
6 | |||
7 | #define DEBUG 1 | ||
8 | |||
9 | static struct iommu_table_entry * __init | ||
10 | find_dependents_of(struct iommu_table_entry *start, | ||
11 | struct iommu_table_entry *finish, | ||
12 | struct iommu_table_entry *q) | ||
13 | { | ||
14 | struct iommu_table_entry *p; | ||
15 | |||
16 | if (!q) | ||
17 | return NULL; | ||
18 | |||
19 | for (p = start; p < finish; p++) | ||
20 | if (p->detect == q->depend) | ||
21 | return p; | ||
22 | |||
23 | return NULL; | ||
24 | } | ||
25 | |||
26 | |||
27 | void __init sort_iommu_table(struct iommu_table_entry *start, | ||
28 | struct iommu_table_entry *finish) { | ||
29 | |||
30 | struct iommu_table_entry *p, *q, tmp; | ||
31 | |||
32 | for (p = start; p < finish; p++) { | ||
33 | again: | ||
34 | q = find_dependents_of(start, finish, p); | ||
35 | /* We are bit sneaky here. We use the memory address to figure | ||
36 | * out if the node we depend on is past our point, if so, swap. | ||
37 | */ | ||
38 | if (q > p) { | ||
39 | tmp = *p; | ||
40 | memmove(p, q, sizeof(*p)); | ||
41 | *q = tmp; | ||
42 | goto again; | ||
43 | } | ||
44 | } | ||
45 | |||
46 | } | ||
47 | |||
48 | #ifdef DEBUG | ||
49 | void __init check_iommu_entries(struct iommu_table_entry *start, | ||
50 | struct iommu_table_entry *finish) | ||
51 | { | ||
52 | struct iommu_table_entry *p, *q, *x; | ||
53 | char sym_p[KSYM_SYMBOL_LEN]; | ||
54 | char sym_q[KSYM_SYMBOL_LEN]; | ||
55 | |||
56 | /* Simple cyclic dependency checker. */ | ||
57 | for (p = start; p < finish; p++) { | ||
58 | q = find_dependents_of(start, finish, p); | ||
59 | x = find_dependents_of(start, finish, q); | ||
60 | if (p == x) { | ||
61 | sprint_symbol(sym_p, (unsigned long)p->detect); | ||
62 | sprint_symbol(sym_q, (unsigned long)q->detect); | ||
63 | |||
64 | printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \ | ||
65 | " on %s and vice-versa. BREAKING IT.\n", | ||
66 | sym_p, sym_q); | ||
67 | /* Heavy handed way..*/ | ||
68 | x->depend = 0; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | for (p = start; p < finish; p++) { | ||
73 | q = find_dependents_of(p, finish, p); | ||
74 | if (q && q > p) { | ||
75 | sprint_symbol(sym_p, (unsigned long)p->detect); | ||
76 | sprint_symbol(sym_q, (unsigned long)q->detect); | ||
77 | |||
78 | printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\ | ||
79 | "should be called before %s!\n", | ||
80 | sym_p, sym_q); | ||
81 | } | ||
82 | } | ||
83 | } | ||
84 | #else | ||
85 | inline void check_iommu_entries(struct iommu_table_entry *start, | ||
86 | struct iommu_table_entry *finish) | ||
87 | { | ||
88 | } | ||
89 | #endif | ||
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a5bc528d4328..8f972cbddef0 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -10,7 +10,8 @@ | |||
10 | #include <asm/iommu.h> | 10 | #include <asm/iommu.h> |
11 | #include <asm/swiotlb.h> | 11 | #include <asm/swiotlb.h> |
12 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
13 | 13 | #include <asm/xen/swiotlb-xen.h> | |
14 | #include <asm/iommu_table.h> | ||
14 | int swiotlb __read_mostly; | 15 | int swiotlb __read_mostly; |
15 | 16 | ||
16 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 17 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
@@ -41,25 +42,42 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
41 | }; | 42 | }; |
42 | 43 | ||
43 | /* | 44 | /* |
44 | * pci_swiotlb_detect - set swiotlb to 1 if necessary | 45 | * pci_swiotlb_detect_override - set swiotlb to 1 if necessary |
45 | * | 46 | * |
46 | * This returns non-zero if we are forced to use swiotlb (by the boot | 47 | * This returns non-zero if we are forced to use swiotlb (by the boot |
47 | * option). | 48 | * option). |
48 | */ | 49 | */ |
49 | int __init pci_swiotlb_detect(void) | 50 | int __init pci_swiotlb_detect_override(void) |
50 | { | 51 | { |
51 | int use_swiotlb = swiotlb | swiotlb_force; | 52 | int use_swiotlb = swiotlb | swiotlb_force; |
52 | 53 | ||
54 | if (swiotlb_force) | ||
55 | swiotlb = 1; | ||
56 | |||
57 | return use_swiotlb; | ||
58 | } | ||
59 | IOMMU_INIT_FINISH(pci_swiotlb_detect_override, | ||
60 | pci_xen_swiotlb_detect, | ||
61 | pci_swiotlb_init, | ||
62 | pci_swiotlb_late_init); | ||
63 | |||
64 | /* | ||
65 | * if 4GB or more detected (and iommu=off not set) return 1 | ||
66 | * and set swiotlb to 1. | ||
67 | */ | ||
68 | int __init pci_swiotlb_detect_4gb(void) | ||
69 | { | ||
53 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 70 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
54 | #ifdef CONFIG_X86_64 | 71 | #ifdef CONFIG_X86_64 |
55 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) | 72 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) |
56 | swiotlb = 1; | 73 | swiotlb = 1; |
57 | #endif | 74 | #endif |
58 | if (swiotlb_force) | 75 | return swiotlb; |
59 | swiotlb = 1; | ||
60 | |||
61 | return use_swiotlb; | ||
62 | } | 76 | } |
77 | IOMMU_INIT(pci_swiotlb_detect_4gb, | ||
78 | pci_swiotlb_detect_override, | ||
79 | pci_swiotlb_init, | ||
80 | pci_swiotlb_late_init); | ||
63 | 81 | ||
64 | void __init pci_swiotlb_init(void) | 82 | void __init pci_swiotlb_init(void) |
65 | { | 83 | { |
@@ -68,3 +86,15 @@ void __init pci_swiotlb_init(void) | |||
68 | dma_ops = &swiotlb_dma_ops; | 86 | dma_ops = &swiotlb_dma_ops; |
69 | } | 87 | } |
70 | } | 88 | } |
89 | |||
90 | void __init pci_swiotlb_late_init(void) | ||
91 | { | ||
92 | /* An IOMMU turned us off. */ | ||
93 | if (!swiotlb) | ||
94 | swiotlb_free(); | ||
95 | else { | ||
96 | printk(KERN_INFO "PCI-DMA: " | ||
97 | "Using software bounce buffering for IO (SWIOTLB)\n"); | ||
98 | swiotlb_print_info(); | ||
99 | } | ||
100 | } | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a59f6a6df5e2..420e64197850 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/apm_bios.h> | 31 | #include <linux/apm_bios.h> |
32 | #include <linux/initrd.h> | 32 | #include <linux/initrd.h> |
33 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
34 | #include <linux/memblock.h> | ||
34 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
35 | #include <linux/console.h> | 36 | #include <linux/console.h> |
36 | #include <linux/mca.h> | 37 | #include <linux/mca.h> |
@@ -301,7 +302,7 @@ static inline void init_gbpages(void) | |||
301 | static void __init reserve_brk(void) | 302 | static void __init reserve_brk(void) |
302 | { | 303 | { |
303 | if (_brk_end > _brk_start) | 304 | if (_brk_end > _brk_start) |
304 | reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); | 305 | memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); |
305 | 306 | ||
306 | /* Mark brk area as locked down and no longer taking any | 307 | /* Mark brk area as locked down and no longer taking any |
307 | new allocations */ | 308 | new allocations */ |
@@ -323,17 +324,16 @@ static void __init relocate_initrd(void) | |||
323 | char *p, *q; | 324 | char *p, *q; |
324 | 325 | ||
325 | /* We need to move the initrd down into lowmem */ | 326 | /* We need to move the initrd down into lowmem */ |
326 | ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, | 327 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, |
327 | PAGE_SIZE); | 328 | PAGE_SIZE); |
328 | 329 | ||
329 | if (ramdisk_here == -1ULL) | 330 | if (ramdisk_here == MEMBLOCK_ERROR) |
330 | panic("Cannot find place for new RAMDISK of size %lld\n", | 331 | panic("Cannot find place for new RAMDISK of size %lld\n", |
331 | ramdisk_size); | 332 | ramdisk_size); |
332 | 333 | ||
333 | /* Note: this includes all the lowmem currently occupied by | 334 | /* Note: this includes all the lowmem currently occupied by |
334 | the initrd, we rely on that fact to keep the data intact. */ | 335 | the initrd, we rely on that fact to keep the data intact. */ |
335 | reserve_early(ramdisk_here, ramdisk_here + area_size, | 336 | memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); |
336 | "NEW RAMDISK"); | ||
337 | initrd_start = ramdisk_here + PAGE_OFFSET; | 337 | initrd_start = ramdisk_here + PAGE_OFFSET; |
338 | initrd_end = initrd_start + ramdisk_size; | 338 | initrd_end = initrd_start + ramdisk_size; |
339 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", | 339 | printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", |
@@ -389,7 +389,7 @@ static void __init reserve_initrd(void) | |||
389 | initrd_start = 0; | 389 | initrd_start = 0; |
390 | 390 | ||
391 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 391 | if (ramdisk_size >= (end_of_lowmem>>1)) { |
392 | free_early(ramdisk_image, ramdisk_end); | 392 | memblock_x86_free_range(ramdisk_image, ramdisk_end); |
393 | printk(KERN_ERR "initrd too large to handle, " | 393 | printk(KERN_ERR "initrd too large to handle, " |
394 | "disabling initrd\n"); | 394 | "disabling initrd\n"); |
395 | return; | 395 | return; |
@@ -412,7 +412,7 @@ static void __init reserve_initrd(void) | |||
412 | 412 | ||
413 | relocate_initrd(); | 413 | relocate_initrd(); |
414 | 414 | ||
415 | free_early(ramdisk_image, ramdisk_end); | 415 | memblock_x86_free_range(ramdisk_image, ramdisk_end); |
416 | } | 416 | } |
417 | #else | 417 | #else |
418 | static void __init reserve_initrd(void) | 418 | static void __init reserve_initrd(void) |
@@ -468,7 +468,7 @@ static void __init e820_reserve_setup_data(void) | |||
468 | e820_print_map("reserve setup_data"); | 468 | e820_print_map("reserve setup_data"); |
469 | } | 469 | } |
470 | 470 | ||
471 | static void __init reserve_early_setup_data(void) | 471 | static void __init memblock_x86_reserve_range_setup_data(void) |
472 | { | 472 | { |
473 | struct setup_data *data; | 473 | struct setup_data *data; |
474 | u64 pa_data; | 474 | u64 pa_data; |
@@ -480,7 +480,7 @@ static void __init reserve_early_setup_data(void) | |||
480 | while (pa_data) { | 480 | while (pa_data) { |
481 | data = early_memremap(pa_data, sizeof(*data)); | 481 | data = early_memremap(pa_data, sizeof(*data)); |
482 | sprintf(buf, "setup data %x", data->type); | 482 | sprintf(buf, "setup data %x", data->type); |
483 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | 483 | memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); |
484 | pa_data = data->next; | 484 | pa_data = data->next; |
485 | early_iounmap(data, sizeof(*data)); | 485 | early_iounmap(data, sizeof(*data)); |
486 | } | 486 | } |
@@ -501,6 +501,7 @@ static inline unsigned long long get_total_mem(void) | |||
501 | return total << PAGE_SHIFT; | 501 | return total << PAGE_SHIFT; |
502 | } | 502 | } |
503 | 503 | ||
504 | #define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF | ||
504 | static void __init reserve_crashkernel(void) | 505 | static void __init reserve_crashkernel(void) |
505 | { | 506 | { |
506 | unsigned long long total_mem; | 507 | unsigned long long total_mem; |
@@ -518,23 +519,27 @@ static void __init reserve_crashkernel(void) | |||
518 | if (crash_base <= 0) { | 519 | if (crash_base <= 0) { |
519 | const unsigned long long alignment = 16<<20; /* 16M */ | 520 | const unsigned long long alignment = 16<<20; /* 16M */ |
520 | 521 | ||
521 | crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, | 522 | /* |
522 | alignment); | 523 | * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX |
523 | if (crash_base == -1ULL) { | 524 | */ |
525 | crash_base = memblock_find_in_range(alignment, | ||
526 | DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); | ||
527 | |||
528 | if (crash_base == MEMBLOCK_ERROR) { | ||
524 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 529 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
525 | return; | 530 | return; |
526 | } | 531 | } |
527 | } else { | 532 | } else { |
528 | unsigned long long start; | 533 | unsigned long long start; |
529 | 534 | ||
530 | start = find_e820_area(crash_base, ULONG_MAX, crash_size, | 535 | start = memblock_find_in_range(crash_base, |
531 | 1<<20); | 536 | crash_base + crash_size, crash_size, 1<<20); |
532 | if (start != crash_base) { | 537 | if (start != crash_base) { |
533 | pr_info("crashkernel reservation failed - memory is in use.\n"); | 538 | pr_info("crashkernel reservation failed - memory is in use.\n"); |
534 | return; | 539 | return; |
535 | } | 540 | } |
536 | } | 541 | } |
537 | reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); | 542 | memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); |
538 | 543 | ||
539 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 544 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
540 | "for crashkernel (System RAM: %ldMB)\n", | 545 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -614,7 +619,7 @@ static __init void reserve_ibft_region(void) | |||
614 | addr = find_ibft_region(&size); | 619 | addr = find_ibft_region(&size); |
615 | 620 | ||
616 | if (size) | 621 | if (size) |
617 | reserve_early_overlap_ok(addr, addr + size, "ibft"); | 622 | memblock_x86_reserve_range(addr, addr + size, "* ibft"); |
618 | } | 623 | } |
619 | 624 | ||
620 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | 625 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
@@ -664,6 +669,15 @@ static int __init parse_reservelow(char *p) | |||
664 | 669 | ||
665 | early_param("reservelow", parse_reservelow); | 670 | early_param("reservelow", parse_reservelow); |
666 | 671 | ||
672 | static u64 __init get_max_mapped(void) | ||
673 | { | ||
674 | u64 end = max_pfn_mapped; | ||
675 | |||
676 | end <<= PAGE_SHIFT; | ||
677 | |||
678 | return end; | ||
679 | } | ||
680 | |||
667 | /* | 681 | /* |
668 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 682 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
669 | * passed the efi memmap, systab, etc., so we should use these data structures | 683 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -738,7 +752,7 @@ void __init setup_arch(char **cmdline_p) | |||
738 | #endif | 752 | #endif |
739 | 4)) { | 753 | 4)) { |
740 | efi_enabled = 1; | 754 | efi_enabled = 1; |
741 | efi_reserve_early(); | 755 | efi_memblock_x86_reserve_range(); |
742 | } | 756 | } |
743 | #endif | 757 | #endif |
744 | 758 | ||
@@ -795,7 +809,7 @@ void __init setup_arch(char **cmdline_p) | |||
795 | x86_report_nx(); | 809 | x86_report_nx(); |
796 | 810 | ||
797 | /* after early param, so could get panic from serial */ | 811 | /* after early param, so could get panic from serial */ |
798 | reserve_early_setup_data(); | 812 | memblock_x86_reserve_range_setup_data(); |
799 | 813 | ||
800 | if (acpi_mps_check()) { | 814 | if (acpi_mps_check()) { |
801 | #ifdef CONFIG_X86_LOCAL_APIC | 815 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -848,8 +862,6 @@ void __init setup_arch(char **cmdline_p) | |||
848 | */ | 862 | */ |
849 | max_pfn = e820_end_of_ram_pfn(); | 863 | max_pfn = e820_end_of_ram_pfn(); |
850 | 864 | ||
851 | /* preallocate 4k for mptable mpc */ | ||
852 | early_reserve_e820_mpc_new(); | ||
853 | /* update e820 for memory not covered by WB MTRRs */ | 865 | /* update e820 for memory not covered by WB MTRRs */ |
854 | mtrr_bp_init(); | 866 | mtrr_bp_init(); |
855 | if (mtrr_trim_uncached_memory(max_pfn)) | 867 | if (mtrr_trim_uncached_memory(max_pfn)) |
@@ -871,18 +883,8 @@ void __init setup_arch(char **cmdline_p) | |||
871 | max_low_pfn = max_pfn; | 883 | max_low_pfn = max_pfn; |
872 | 884 | ||
873 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | 885 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; |
874 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
875 | #endif | ||
876 | |||
877 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
878 | setup_bios_corruption_check(); | ||
879 | #endif | 886 | #endif |
880 | 887 | ||
881 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | ||
882 | max_pfn_mapped<<PAGE_SHIFT); | ||
883 | |||
884 | reserve_brk(); | ||
885 | |||
886 | /* | 888 | /* |
887 | * Find and reserve possible boot-time SMP configuration: | 889 | * Find and reserve possible boot-time SMP configuration: |
888 | */ | 890 | */ |
@@ -890,6 +892,26 @@ void __init setup_arch(char **cmdline_p) | |||
890 | 892 | ||
891 | reserve_ibft_region(); | 893 | reserve_ibft_region(); |
892 | 894 | ||
895 | /* | ||
896 | * Need to conclude brk, before memblock_x86_fill() | ||
897 | * it could use memblock_find_in_range, could overlap with | ||
898 | * brk area. | ||
899 | */ | ||
900 | reserve_brk(); | ||
901 | |||
902 | memblock.current_limit = get_max_mapped(); | ||
903 | memblock_x86_fill(); | ||
904 | |||
905 | /* preallocate 4k for mptable mpc */ | ||
906 | early_reserve_e820_mpc_new(); | ||
907 | |||
908 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
909 | setup_bios_corruption_check(); | ||
910 | #endif | ||
911 | |||
912 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | ||
913 | max_pfn_mapped<<PAGE_SHIFT); | ||
914 | |||
893 | reserve_trampoline_memory(); | 915 | reserve_trampoline_memory(); |
894 | 916 | ||
895 | #ifdef CONFIG_ACPI_SLEEP | 917 | #ifdef CONFIG_ACPI_SLEEP |
@@ -913,6 +935,7 @@ void __init setup_arch(char **cmdline_p) | |||
913 | max_low_pfn = max_pfn; | 935 | max_low_pfn = max_pfn; |
914 | } | 936 | } |
915 | #endif | 937 | #endif |
938 | memblock.current_limit = get_max_mapped(); | ||
916 | 939 | ||
917 | /* | 940 | /* |
918 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 941 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
@@ -951,10 +974,7 @@ void __init setup_arch(char **cmdline_p) | |||
951 | #endif | 974 | #endif |
952 | 975 | ||
953 | initmem_init(0, max_pfn, acpi, k8); | 976 | initmem_init(0, max_pfn, acpi, k8); |
954 | #ifndef CONFIG_NO_BOOTMEM | 977 | memblock_find_dma_reserve(); |
955 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
956 | #endif | ||
957 | |||
958 | dma32_reserve_bootmem(); | 978 | dma32_reserve_bootmem(); |
959 | 979 | ||
960 | #ifdef CONFIG_KVM_CLOCK | 980 | #ifdef CONFIG_KVM_CLOCK |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 2335c15c93a4..002b79685f73 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -131,13 +131,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
131 | 131 | ||
132 | static void __init pcpu_fc_free(void *ptr, size_t size) | 132 | static void __init pcpu_fc_free(void *ptr, size_t size) |
133 | { | 133 | { |
134 | #ifdef CONFIG_NO_BOOTMEM | ||
135 | u64 start = __pa(ptr); | ||
136 | u64 end = start + size; | ||
137 | free_early_partial(start, end); | ||
138 | #else | ||
139 | free_bootmem(__pa(ptr), size); | 134 | free_bootmem(__pa(ptr), size); |
140 | #endif | ||
141 | } | 135 | } |
142 | 136 | ||
143 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 137 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index e2a595257390..4c3da5674e67 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -1,8 +1,8 @@ | |||
1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
2 | #include <linux/memblock.h> | ||
2 | 3 | ||
3 | #include <asm/trampoline.h> | 4 | #include <asm/trampoline.h> |
4 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
5 | #include <asm/e820.h> | ||
6 | 6 | ||
7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | 7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) |
8 | #define __trampinit | 8 | #define __trampinit |
@@ -17,15 +17,15 @@ unsigned char *__trampinitdata trampoline_base; | |||
17 | 17 | ||
18 | void __init reserve_trampoline_memory(void) | 18 | void __init reserve_trampoline_memory(void) |
19 | { | 19 | { |
20 | unsigned long mem; | 20 | phys_addr_t mem; |
21 | 21 | ||
22 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 22 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
23 | mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); | 23 | mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); |
24 | if (mem == -1L) | 24 | if (mem == MEMBLOCK_ERROR) |
25 | panic("Cannot allocate trampoline\n"); | 25 | panic("Cannot allocate trampoline\n"); |
26 | 26 | ||
27 | trampoline_base = __va(mem); | 27 | trampoline_base = __va(mem); |
28 | reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); | 28 | memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); |
29 | } | 29 | } |
30 | 30 | ||
31 | /* | 31 | /* |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d0bb52296fa3..38e2b67807e1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -242,6 +242,12 @@ SECTIONS | |||
242 | __x86_cpu_dev_end = .; | 242 | __x86_cpu_dev_end = .; |
243 | } | 243 | } |
244 | 244 | ||
245 | /* | ||
246 | * start address and size of operations which during runtime | ||
247 | * can be patched with virtualization friendly instructions or | ||
248 | * baremetal native ones. Think page table operations. | ||
249 | * Details in paravirt_types.h | ||
250 | */ | ||
245 | . = ALIGN(8); | 251 | . = ALIGN(8); |
246 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | 252 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { |
247 | __parainstructions = .; | 253 | __parainstructions = .; |
@@ -249,6 +255,11 @@ SECTIONS | |||
249 | __parainstructions_end = .; | 255 | __parainstructions_end = .; |
250 | } | 256 | } |
251 | 257 | ||
258 | /* | ||
259 | * struct alt_inst entries. From the header (alternative.h): | ||
260 | * "Alternative instructions for different CPU types or capabilities" | ||
261 | * Think locking instructions on spinlocks. | ||
262 | */ | ||
252 | . = ALIGN(8); | 263 | . = ALIGN(8); |
253 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { | 264 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { |
254 | __alt_instructions = .; | 265 | __alt_instructions = .; |
@@ -256,11 +267,28 @@ SECTIONS | |||
256 | __alt_instructions_end = .; | 267 | __alt_instructions_end = .; |
257 | } | 268 | } |
258 | 269 | ||
270 | /* | ||
271 | * And here are the replacement instructions. The linker sticks | ||
272 | * them as binary blobs. The .altinstructions has enough data to | ||
273 | * get the address and the length of them to patch the kernel safely. | ||
274 | */ | ||
259 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { | 275 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { |
260 | *(.altinstr_replacement) | 276 | *(.altinstr_replacement) |
261 | } | 277 | } |
262 | 278 | ||
263 | /* | 279 | /* |
280 | * struct iommu_table_entry entries are injected in this section. | ||
281 | * It is an array of IOMMUs which during run time gets sorted depending | ||
282 | * on its dependency order. After rootfs_initcall is complete | ||
283 | * this section can be safely removed. | ||
284 | */ | ||
285 | .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) { | ||
286 | __iommu_table = .; | ||
287 | *(.iommu_table) | ||
288 | __iommu_table_end = .; | ||
289 | } | ||
290 | . = ALIGN(8); | ||
291 | /* | ||
264 | * .exit.text is discard at runtime, not link time, to deal with | 292 | * .exit.text is discard at runtime, not link time, to deal with |
265 | * references from .altinstructions and .eh_frame | 293 | * references from .altinstructions and .eh_frame |
266 | */ | 294 | */ |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index a4c768397baa..55543397a8a7 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -26,4 +26,6 @@ obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o | |||
26 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o | 26 | obj-$(CONFIG_K8_NUMA) += k8topology_64.o |
27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o |
28 | 28 | ||
29 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | ||
30 | |||
29 | obj-$(CONFIG_MEMTEST) += memtest.o | 31 | obj-$(CONFIG_MEMTEST) += memtest.o |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index b278535b14aa..c0e28a13de7d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/initrd.h> | 2 | #include <linux/initrd.h> |
3 | #include <linux/ioport.h> | 3 | #include <linux/ioport.h> |
4 | #include <linux/swap.h> | 4 | #include <linux/swap.h> |
5 | #include <linux/memblock.h> | ||
5 | 6 | ||
6 | #include <asm/cacheflush.h> | 7 | #include <asm/cacheflush.h> |
7 | #include <asm/e820.h> | 8 | #include <asm/e820.h> |
@@ -33,6 +34,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
33 | int use_gbpages) | 34 | int use_gbpages) |
34 | { | 35 | { |
35 | unsigned long puds, pmds, ptes, tables, start; | 36 | unsigned long puds, pmds, ptes, tables, start; |
37 | phys_addr_t base; | ||
36 | 38 | ||
37 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 39 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
38 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 40 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); |
@@ -75,12 +77,12 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
75 | #else | 77 | #else |
76 | start = 0x8000; | 78 | start = 0x8000; |
77 | #endif | 79 | #endif |
78 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 80 | base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT, |
79 | tables, PAGE_SIZE); | 81 | tables, PAGE_SIZE); |
80 | if (e820_table_start == -1UL) | 82 | if (base == MEMBLOCK_ERROR) |
81 | panic("Cannot find space for the kernel page tables"); | 83 | panic("Cannot find space for the kernel page tables"); |
82 | 84 | ||
83 | e820_table_start >>= PAGE_SHIFT; | 85 | e820_table_start = base >> PAGE_SHIFT; |
84 | e820_table_end = e820_table_start; | 86 | e820_table_end = e820_table_start; |
85 | e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); | 87 | e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); |
86 | 88 | ||
@@ -299,7 +301,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
299 | __flush_tlb_all(); | 301 | __flush_tlb_all(); |
300 | 302 | ||
301 | if (!after_bootmem && e820_table_end > e820_table_start) | 303 | if (!after_bootmem && e820_table_end > e820_table_start) |
302 | reserve_early(e820_table_start << PAGE_SHIFT, | 304 | memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT, |
303 | e820_table_end << PAGE_SHIFT, "PGTABLE"); | 305 | e820_table_end << PAGE_SHIFT, "PGTABLE"); |
304 | 306 | ||
305 | if (!after_bootmem) | 307 | if (!after_bootmem) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 558f2d332076..5d0a6711c282 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/pfn.h> | 25 | #include <linux/pfn.h> |
26 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/memblock.h> | ||
28 | #include <linux/proc_fs.h> | 29 | #include <linux/proc_fs.h> |
29 | #include <linux/memory_hotplug.h> | 30 | #include <linux/memory_hotplug.h> |
30 | #include <linux/initrd.h> | 31 | #include <linux/initrd.h> |
@@ -422,49 +423,28 @@ static void __init add_one_highpage_init(struct page *page) | |||
422 | totalhigh_pages++; | 423 | totalhigh_pages++; |
423 | } | 424 | } |
424 | 425 | ||
425 | struct add_highpages_data { | 426 | void __init add_highpages_with_active_regions(int nid, |
426 | unsigned long start_pfn; | 427 | unsigned long start_pfn, unsigned long end_pfn) |
427 | unsigned long end_pfn; | ||
428 | }; | ||
429 | |||
430 | static int __init add_highpages_work_fn(unsigned long start_pfn, | ||
431 | unsigned long end_pfn, void *datax) | ||
432 | { | 428 | { |
433 | int node_pfn; | 429 | struct range *range; |
434 | struct page *page; | 430 | int nr_range; |
435 | unsigned long final_start_pfn, final_end_pfn; | 431 | int i; |
436 | struct add_highpages_data *data; | ||
437 | 432 | ||
438 | data = (struct add_highpages_data *)datax; | 433 | nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn); |
439 | 434 | ||
440 | final_start_pfn = max(start_pfn, data->start_pfn); | 435 | for (i = 0; i < nr_range; i++) { |
441 | final_end_pfn = min(end_pfn, data->end_pfn); | 436 | struct page *page; |
442 | if (final_start_pfn >= final_end_pfn) | 437 | int node_pfn; |
443 | return 0; | ||
444 | 438 | ||
445 | for (node_pfn = final_start_pfn; node_pfn < final_end_pfn; | 439 | for (node_pfn = range[i].start; node_pfn < range[i].end; |
446 | node_pfn++) { | 440 | node_pfn++) { |
447 | if (!pfn_valid(node_pfn)) | 441 | if (!pfn_valid(node_pfn)) |
448 | continue; | 442 | continue; |
449 | page = pfn_to_page(node_pfn); | 443 | page = pfn_to_page(node_pfn); |
450 | add_one_highpage_init(page); | 444 | add_one_highpage_init(page); |
445 | } | ||
451 | } | 446 | } |
452 | |||
453 | return 0; | ||
454 | |||
455 | } | 447 | } |
456 | |||
457 | void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, | ||
458 | unsigned long end_pfn) | ||
459 | { | ||
460 | struct add_highpages_data data; | ||
461 | |||
462 | data.start_pfn = start_pfn; | ||
463 | data.end_pfn = end_pfn; | ||
464 | |||
465 | work_with_active_regions(nid, add_highpages_work_fn, &data); | ||
466 | } | ||
467 | |||
468 | #else | 448 | #else |
469 | static inline void permanent_kmaps_init(pgd_t *pgd_base) | 449 | static inline void permanent_kmaps_init(pgd_t *pgd_base) |
470 | { | 450 | { |
@@ -712,14 +692,14 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
712 | highstart_pfn = highend_pfn = max_pfn; | 692 | highstart_pfn = highend_pfn = max_pfn; |
713 | if (max_pfn > max_low_pfn) | 693 | if (max_pfn > max_low_pfn) |
714 | highstart_pfn = max_low_pfn; | 694 | highstart_pfn = max_low_pfn; |
715 | e820_register_active_regions(0, 0, highend_pfn); | 695 | memblock_x86_register_active_regions(0, 0, highend_pfn); |
716 | sparse_memory_present_with_active_regions(0); | 696 | sparse_memory_present_with_active_regions(0); |
717 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 697 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
718 | pages_to_mb(highend_pfn - highstart_pfn)); | 698 | pages_to_mb(highend_pfn - highstart_pfn)); |
719 | num_physpages = highend_pfn; | 699 | num_physpages = highend_pfn; |
720 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 700 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
721 | #else | 701 | #else |
722 | e820_register_active_regions(0, 0, max_low_pfn); | 702 | memblock_x86_register_active_regions(0, 0, max_low_pfn); |
723 | sparse_memory_present_with_active_regions(0); | 703 | sparse_memory_present_with_active_regions(0); |
724 | num_physpages = max_low_pfn; | 704 | num_physpages = max_low_pfn; |
725 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 705 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
@@ -750,68 +730,12 @@ static void __init zone_sizes_init(void) | |||
750 | free_area_init_nodes(max_zone_pfns); | 730 | free_area_init_nodes(max_zone_pfns); |
751 | } | 731 | } |
752 | 732 | ||
753 | #ifndef CONFIG_NO_BOOTMEM | ||
754 | static unsigned long __init setup_node_bootmem(int nodeid, | ||
755 | unsigned long start_pfn, | ||
756 | unsigned long end_pfn, | ||
757 | unsigned long bootmap) | ||
758 | { | ||
759 | unsigned long bootmap_size; | ||
760 | |||
761 | /* don't touch min_low_pfn */ | ||
762 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | ||
763 | bootmap >> PAGE_SHIFT, | ||
764 | start_pfn, end_pfn); | ||
765 | printk(KERN_INFO " node %d low ram: %08lx - %08lx\n", | ||
766 | nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
767 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | ||
768 | nodeid, bootmap, bootmap + bootmap_size); | ||
769 | free_bootmem_with_active_regions(nodeid, end_pfn); | ||
770 | |||
771 | return bootmap + bootmap_size; | ||
772 | } | ||
773 | #endif | ||
774 | |||
775 | void __init setup_bootmem_allocator(void) | 733 | void __init setup_bootmem_allocator(void) |
776 | { | 734 | { |
777 | #ifndef CONFIG_NO_BOOTMEM | ||
778 | int nodeid; | ||
779 | unsigned long bootmap_size, bootmap; | ||
780 | /* | ||
781 | * Initialize the boot-time allocator (with low memory only): | ||
782 | */ | ||
783 | bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT; | ||
784 | bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size, | ||
785 | PAGE_SIZE); | ||
786 | if (bootmap == -1L) | ||
787 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | ||
788 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
789 | #endif | ||
790 | |||
791 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 735 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
792 | max_pfn_mapped<<PAGE_SHIFT); | 736 | max_pfn_mapped<<PAGE_SHIFT); |
793 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 737 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
794 | 738 | ||
795 | #ifndef CONFIG_NO_BOOTMEM | ||
796 | for_each_online_node(nodeid) { | ||
797 | unsigned long start_pfn, end_pfn; | ||
798 | |||
799 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
800 | start_pfn = node_start_pfn[nodeid]; | ||
801 | end_pfn = node_end_pfn[nodeid]; | ||
802 | if (start_pfn > max_low_pfn) | ||
803 | continue; | ||
804 | if (end_pfn > max_low_pfn) | ||
805 | end_pfn = max_low_pfn; | ||
806 | #else | ||
807 | start_pfn = 0; | ||
808 | end_pfn = max_low_pfn; | ||
809 | #endif | ||
810 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | ||
811 | bootmap); | ||
812 | } | ||
813 | #endif | ||
814 | |||
815 | after_bootmem = 1; | 739 | after_bootmem = 1; |
816 | } | 740 | } |
817 | 741 | ||
@@ -1070,8 +994,3 @@ void mark_rodata_ro(void) | |||
1070 | } | 994 | } |
1071 | #endif | 995 | #endif |
1072 | 996 | ||
1073 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | ||
1074 | int flags) | ||
1075 | { | ||
1076 | return reserve_bootmem(phys, len, flags); | ||
1077 | } | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c55f900fbf89..84346200e783 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/initrd.h> | 21 | #include <linux/initrd.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
24 | #include <linux/memblock.h> | ||
24 | #include <linux/proc_fs.h> | 25 | #include <linux/proc_fs.h> |
25 | #include <linux/pci.h> | 26 | #include <linux/pci.h> |
26 | #include <linux/pfn.h> | 27 | #include <linux/pfn.h> |
@@ -52,8 +53,6 @@ | |||
52 | #include <asm/init.h> | 53 | #include <asm/init.h> |
53 | #include <linux/bootmem.h> | 54 | #include <linux/bootmem.h> |
54 | 55 | ||
55 | static unsigned long dma_reserve __initdata; | ||
56 | |||
57 | static int __init parse_direct_gbpages_off(char *arg) | 56 | static int __init parse_direct_gbpages_off(char *arg) |
58 | { | 57 | { |
59 | direct_gbpages = 0; | 58 | direct_gbpages = 0; |
@@ -617,23 +616,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
617 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 616 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
618 | int acpi, int k8) | 617 | int acpi, int k8) |
619 | { | 618 | { |
620 | #ifndef CONFIG_NO_BOOTMEM | 619 | memblock_x86_register_active_regions(0, start_pfn, end_pfn); |
621 | unsigned long bootmap_size, bootmap; | ||
622 | |||
623 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | ||
624 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size, | ||
625 | PAGE_SIZE); | ||
626 | if (bootmap == -1L) | ||
627 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | ||
628 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
629 | /* don't touch min_low_pfn */ | ||
630 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | ||
631 | 0, end_pfn); | ||
632 | e820_register_active_regions(0, start_pfn, end_pfn); | ||
633 | free_bootmem_with_active_regions(0, end_pfn); | ||
634 | #else | ||
635 | e820_register_active_regions(0, start_pfn, end_pfn); | ||
636 | #endif | ||
637 | } | 620 | } |
638 | #endif | 621 | #endif |
639 | 622 | ||
@@ -843,52 +826,6 @@ void mark_rodata_ro(void) | |||
843 | 826 | ||
844 | #endif | 827 | #endif |
845 | 828 | ||
846 | int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, | ||
847 | int flags) | ||
848 | { | ||
849 | #ifdef CONFIG_NUMA | ||
850 | int nid, next_nid; | ||
851 | int ret; | ||
852 | #endif | ||
853 | unsigned long pfn = phys >> PAGE_SHIFT; | ||
854 | |||
855 | if (pfn >= max_pfn) { | ||
856 | /* | ||
857 | * This can happen with kdump kernels when accessing | ||
858 | * firmware tables: | ||
859 | */ | ||
860 | if (pfn < max_pfn_mapped) | ||
861 | return -EFAULT; | ||
862 | |||
863 | printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n", | ||
864 | phys, len); | ||
865 | return -EFAULT; | ||
866 | } | ||
867 | |||
868 | /* Should check here against the e820 map to avoid double free */ | ||
869 | #ifdef CONFIG_NUMA | ||
870 | nid = phys_to_nid(phys); | ||
871 | next_nid = phys_to_nid(phys + len - 1); | ||
872 | if (nid == next_nid) | ||
873 | ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags); | ||
874 | else | ||
875 | ret = reserve_bootmem(phys, len, flags); | ||
876 | |||
877 | if (ret != 0) | ||
878 | return ret; | ||
879 | |||
880 | #else | ||
881 | reserve_bootmem(phys, len, flags); | ||
882 | #endif | ||
883 | |||
884 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { | ||
885 | dma_reserve += len / PAGE_SIZE; | ||
886 | set_dma_reserve(dma_reserve); | ||
887 | } | ||
888 | |||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | int kern_addr_valid(unsigned long addr) | 829 | int kern_addr_valid(unsigned long addr) |
893 | { | 830 | { |
894 | unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; | 831 | unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 3ba6e0608c55..0369843511dc 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -362,6 +362,11 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) | |||
362 | return &bm_pte[pte_index(addr)]; | 362 | return &bm_pte[pte_index(addr)]; |
363 | } | 363 | } |
364 | 364 | ||
365 | bool __init is_early_ioremap_ptep(pte_t *ptep) | ||
366 | { | ||
367 | return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; | ||
368 | } | ||
369 | |||
365 | static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; | 370 | static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; |
366 | 371 | ||
367 | void __init early_ioremap_init(void) | 372 | void __init early_ioremap_init(void) |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 52d54bfc1ebb..804a3b6c6e14 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
14 | #include <linux/memblock.h> | ||
15 | |||
14 | #include <asm/io.h> | 16 | #include <asm/io.h> |
15 | #include <linux/pci_ids.h> | 17 | #include <linux/pci_ids.h> |
16 | #include <linux/acpi.h> | 18 | #include <linux/acpi.h> |
@@ -222,7 +224,7 @@ int __init k8_scan_nodes(void) | |||
222 | for_each_node_mask(i, node_possible_map) { | 224 | for_each_node_mask(i, node_possible_map) { |
223 | int j; | 225 | int j; |
224 | 226 | ||
225 | e820_register_active_regions(i, | 227 | memblock_x86_register_active_regions(i, |
226 | nodes[i].start >> PAGE_SHIFT, | 228 | nodes[i].start >> PAGE_SHIFT, |
227 | nodes[i].end >> PAGE_SHIFT); | 229 | nodes[i].end >> PAGE_SHIFT); |
228 | for (j = apicid_base; j < cores + apicid_base; j++) | 230 | for (j = apicid_base; j < cores + apicid_base; j++) |
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c new file mode 100644 index 000000000000..aa1169392b83 --- /dev/null +++ b/arch/x86/mm/memblock.c | |||
@@ -0,0 +1,348 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/types.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/bitops.h> | ||
5 | #include <linux/memblock.h> | ||
6 | #include <linux/bootmem.h> | ||
7 | #include <linux/mm.h> | ||
8 | #include <linux/range.h> | ||
9 | |||
10 | /* Check for already reserved areas */ | ||
11 | static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align) | ||
12 | { | ||
13 | struct memblock_region *r; | ||
14 | u64 addr = *addrp, last; | ||
15 | u64 size = *sizep; | ||
16 | bool changed = false; | ||
17 | |||
18 | again: | ||
19 | last = addr + size; | ||
20 | for_each_memblock(reserved, r) { | ||
21 | if (last > r->base && addr < r->base) { | ||
22 | size = r->base - addr; | ||
23 | changed = true; | ||
24 | goto again; | ||
25 | } | ||
26 | if (last > (r->base + r->size) && addr < (r->base + r->size)) { | ||
27 | addr = round_up(r->base + r->size, align); | ||
28 | size = last - addr; | ||
29 | changed = true; | ||
30 | goto again; | ||
31 | } | ||
32 | if (last <= (r->base + r->size) && addr >= r->base) { | ||
33 | *sizep = 0; | ||
34 | return false; | ||
35 | } | ||
36 | } | ||
37 | if (changed) { | ||
38 | *addrp = addr; | ||
39 | *sizep = size; | ||
40 | } | ||
41 | return changed; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * Find next free range after start, and size is returned in *sizep | ||
46 | */ | ||
47 | u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) | ||
48 | { | ||
49 | struct memblock_region *r; | ||
50 | |||
51 | for_each_memblock(memory, r) { | ||
52 | u64 ei_start = r->base; | ||
53 | u64 ei_last = ei_start + r->size; | ||
54 | u64 addr; | ||
55 | |||
56 | addr = round_up(ei_start, align); | ||
57 | if (addr < start) | ||
58 | addr = round_up(start, align); | ||
59 | if (addr >= ei_last) | ||
60 | continue; | ||
61 | *sizep = ei_last - addr; | ||
62 | while (check_with_memblock_reserved_size(&addr, sizep, align)) | ||
63 | ; | ||
64 | |||
65 | if (*sizep) | ||
66 | return addr; | ||
67 | } | ||
68 | |||
69 | return MEMBLOCK_ERROR; | ||
70 | } | ||
71 | |||
72 | static __init struct range *find_range_array(int count) | ||
73 | { | ||
74 | u64 end, size, mem; | ||
75 | struct range *range; | ||
76 | |||
77 | size = sizeof(struct range) * count; | ||
78 | end = memblock.current_limit; | ||
79 | |||
80 | mem = memblock_find_in_range(0, end, size, sizeof(struct range)); | ||
81 | if (mem == MEMBLOCK_ERROR) | ||
82 | panic("can not find more space for range array"); | ||
83 | |||
84 | /* | ||
85 | * This range is tempoaray, so don't reserve it, it will not be | ||
86 | * overlapped because We will not alloccate new buffer before | ||
87 | * We discard this one | ||
88 | */ | ||
89 | range = __va(mem); | ||
90 | memset(range, 0, size); | ||
91 | |||
92 | return range; | ||
93 | } | ||
94 | |||
95 | static void __init memblock_x86_subtract_reserved(struct range *range, int az) | ||
96 | { | ||
97 | u64 final_start, final_end; | ||
98 | struct memblock_region *r; | ||
99 | |||
100 | /* Take out region array itself at first*/ | ||
101 | memblock_free_reserved_regions(); | ||
102 | |||
103 | memblock_dbg("Subtract (%ld early reservations)\n", memblock.reserved.cnt); | ||
104 | |||
105 | for_each_memblock(reserved, r) { | ||
106 | memblock_dbg(" [%010llx-%010llx]\n", (u64)r->base, (u64)r->base + r->size - 1); | ||
107 | final_start = PFN_DOWN(r->base); | ||
108 | final_end = PFN_UP(r->base + r->size); | ||
109 | if (final_start >= final_end) | ||
110 | continue; | ||
111 | subtract_range(range, az, final_start, final_end); | ||
112 | } | ||
113 | |||
114 | /* Put region array back ? */ | ||
115 | memblock_reserve_reserved_regions(); | ||
116 | } | ||
117 | |||
118 | struct count_data { | ||
119 | int nr; | ||
120 | }; | ||
121 | |||
122 | static int __init count_work_fn(unsigned long start_pfn, | ||
123 | unsigned long end_pfn, void *datax) | ||
124 | { | ||
125 | struct count_data *data = datax; | ||
126 | |||
127 | data->nr++; | ||
128 | |||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | static int __init count_early_node_map(int nodeid) | ||
133 | { | ||
134 | struct count_data data; | ||
135 | |||
136 | data.nr = 0; | ||
137 | work_with_active_regions(nodeid, count_work_fn, &data); | ||
138 | |||
139 | return data.nr; | ||
140 | } | ||
141 | |||
142 | int __init __get_free_all_memory_range(struct range **rangep, int nodeid, | ||
143 | unsigned long start_pfn, unsigned long end_pfn) | ||
144 | { | ||
145 | int count; | ||
146 | struct range *range; | ||
147 | int nr_range; | ||
148 | |||
149 | count = (memblock.reserved.cnt + count_early_node_map(nodeid)) * 2; | ||
150 | |||
151 | range = find_range_array(count); | ||
152 | nr_range = 0; | ||
153 | |||
154 | /* | ||
155 | * Use early_node_map[] and memblock.reserved.region to get range array | ||
156 | * at first | ||
157 | */ | ||
158 | nr_range = add_from_early_node_map(range, count, nr_range, nodeid); | ||
159 | subtract_range(range, count, 0, start_pfn); | ||
160 | subtract_range(range, count, end_pfn, -1ULL); | ||
161 | |||
162 | memblock_x86_subtract_reserved(range, count); | ||
163 | nr_range = clean_sort_range(range, count); | ||
164 | |||
165 | *rangep = range; | ||
166 | return nr_range; | ||
167 | } | ||
168 | |||
169 | int __init get_free_all_memory_range(struct range **rangep, int nodeid) | ||
170 | { | ||
171 | unsigned long end_pfn = -1UL; | ||
172 | |||
173 | #ifdef CONFIG_X86_32 | ||
174 | end_pfn = max_low_pfn; | ||
175 | #endif | ||
176 | return __get_free_all_memory_range(rangep, nodeid, 0, end_pfn); | ||
177 | } | ||
178 | |||
179 | static u64 __init __memblock_x86_memory_in_range(u64 addr, u64 limit, bool get_free) | ||
180 | { | ||
181 | int i, count; | ||
182 | struct range *range; | ||
183 | int nr_range; | ||
184 | u64 final_start, final_end; | ||
185 | u64 free_size; | ||
186 | struct memblock_region *r; | ||
187 | |||
188 | count = (memblock.reserved.cnt + memblock.memory.cnt) * 2; | ||
189 | |||
190 | range = find_range_array(count); | ||
191 | nr_range = 0; | ||
192 | |||
193 | addr = PFN_UP(addr); | ||
194 | limit = PFN_DOWN(limit); | ||
195 | |||
196 | for_each_memblock(memory, r) { | ||
197 | final_start = PFN_UP(r->base); | ||
198 | final_end = PFN_DOWN(r->base + r->size); | ||
199 | if (final_start >= final_end) | ||
200 | continue; | ||
201 | if (final_start >= limit || final_end <= addr) | ||
202 | continue; | ||
203 | |||
204 | nr_range = add_range(range, count, nr_range, final_start, final_end); | ||
205 | } | ||
206 | subtract_range(range, count, 0, addr); | ||
207 | subtract_range(range, count, limit, -1ULL); | ||
208 | |||
209 | /* Subtract memblock.reserved.region in range ? */ | ||
210 | if (!get_free) | ||
211 | goto sort_and_count_them; | ||
212 | for_each_memblock(reserved, r) { | ||
213 | final_start = PFN_DOWN(r->base); | ||
214 | final_end = PFN_UP(r->base + r->size); | ||
215 | if (final_start >= final_end) | ||
216 | continue; | ||
217 | if (final_start >= limit || final_end <= addr) | ||
218 | continue; | ||
219 | |||
220 | subtract_range(range, count, final_start, final_end); | ||
221 | } | ||
222 | |||
223 | sort_and_count_them: | ||
224 | nr_range = clean_sort_range(range, count); | ||
225 | |||
226 | free_size = 0; | ||
227 | for (i = 0; i < nr_range; i++) | ||
228 | free_size += range[i].end - range[i].start; | ||
229 | |||
230 | return free_size << PAGE_SHIFT; | ||
231 | } | ||
232 | |||
233 | u64 __init memblock_x86_free_memory_in_range(u64 addr, u64 limit) | ||
234 | { | ||
235 | return __memblock_x86_memory_in_range(addr, limit, true); | ||
236 | } | ||
237 | |||
238 | u64 __init memblock_x86_memory_in_range(u64 addr, u64 limit) | ||
239 | { | ||
240 | return __memblock_x86_memory_in_range(addr, limit, false); | ||
241 | } | ||
242 | |||
243 | void __init memblock_x86_reserve_range(u64 start, u64 end, char *name) | ||
244 | { | ||
245 | if (start == end) | ||
246 | return; | ||
247 | |||
248 | if (WARN_ONCE(start > end, "memblock_x86_reserve_range: wrong range [%#llx, %#llx)\n", start, end)) | ||
249 | return; | ||
250 | |||
251 | memblock_dbg(" memblock_x86_reserve_range: [%#010llx-%#010llx] %16s\n", start, end - 1, name); | ||
252 | |||
253 | memblock_reserve(start, end - start); | ||
254 | } | ||
255 | |||
256 | void __init memblock_x86_free_range(u64 start, u64 end) | ||
257 | { | ||
258 | if (start == end) | ||
259 | return; | ||
260 | |||
261 | if (WARN_ONCE(start > end, "memblock_x86_free_range: wrong range [%#llx, %#llx)\n", start, end)) | ||
262 | return; | ||
263 | |||
264 | memblock_dbg(" memblock_x86_free_range: [%#010llx-%#010llx]\n", start, end - 1); | ||
265 | |||
266 | memblock_free(start, end - start); | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Need to call this function after memblock_x86_register_active_regions, | ||
271 | * so early_node_map[] is filled already. | ||
272 | */ | ||
273 | u64 __init memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align) | ||
274 | { | ||
275 | u64 addr; | ||
276 | addr = find_memory_core_early(nid, size, align, start, end); | ||
277 | if (addr != MEMBLOCK_ERROR) | ||
278 | return addr; | ||
279 | |||
280 | /* Fallback, should already have start end within node range */ | ||
281 | return memblock_find_in_range(start, end, size, align); | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * Finds an active region in the address range from start_pfn to last_pfn and | ||
286 | * returns its range in ei_startpfn and ei_endpfn for the memblock entry. | ||
287 | */ | ||
288 | static int __init memblock_x86_find_active_region(const struct memblock_region *ei, | ||
289 | unsigned long start_pfn, | ||
290 | unsigned long last_pfn, | ||
291 | unsigned long *ei_startpfn, | ||
292 | unsigned long *ei_endpfn) | ||
293 | { | ||
294 | u64 align = PAGE_SIZE; | ||
295 | |||
296 | *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT; | ||
297 | *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT; | ||
298 | |||
299 | /* Skip map entries smaller than a page */ | ||
300 | if (*ei_startpfn >= *ei_endpfn) | ||
301 | return 0; | ||
302 | |||
303 | /* Skip if map is outside the node */ | ||
304 | if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn) | ||
305 | return 0; | ||
306 | |||
307 | /* Check for overlaps */ | ||
308 | if (*ei_startpfn < start_pfn) | ||
309 | *ei_startpfn = start_pfn; | ||
310 | if (*ei_endpfn > last_pfn) | ||
311 | *ei_endpfn = last_pfn; | ||
312 | |||
313 | return 1; | ||
314 | } | ||
315 | |||
316 | /* Walk the memblock.memory map and register active regions within a node */ | ||
317 | void __init memblock_x86_register_active_regions(int nid, unsigned long start_pfn, | ||
318 | unsigned long last_pfn) | ||
319 | { | ||
320 | unsigned long ei_startpfn; | ||
321 | unsigned long ei_endpfn; | ||
322 | struct memblock_region *r; | ||
323 | |||
324 | for_each_memblock(memory, r) | ||
325 | if (memblock_x86_find_active_region(r, start_pfn, last_pfn, | ||
326 | &ei_startpfn, &ei_endpfn)) | ||
327 | add_active_range(nid, ei_startpfn, ei_endpfn); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * Find the hole size (in bytes) in the memory range. | ||
332 | * @start: starting address of the memory range to scan | ||
333 | * @end: ending address of the memory range to scan | ||
334 | */ | ||
335 | u64 __init memblock_x86_hole_size(u64 start, u64 end) | ||
336 | { | ||
337 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
338 | unsigned long last_pfn = end >> PAGE_SHIFT; | ||
339 | unsigned long ei_startpfn, ei_endpfn, ram = 0; | ||
340 | struct memblock_region *r; | ||
341 | |||
342 | for_each_memblock(memory, r) | ||
343 | if (memblock_x86_find_active_region(r, start_pfn, last_pfn, | ||
344 | &ei_startpfn, &ei_endpfn)) | ||
345 | ram += ei_endpfn - ei_startpfn; | ||
346 | |||
347 | return end - start - ((u64)ram << PAGE_SHIFT); | ||
348 | } | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 18d244f70205..92faf3a1c53e 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -6,8 +6,7 @@ | |||
6 | #include <linux/smp.h> | 6 | #include <linux/smp.h> |
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/pfn.h> | 8 | #include <linux/pfn.h> |
9 | 9 | #include <linux/memblock.h> | |
10 | #include <asm/e820.h> | ||
11 | 10 | ||
12 | static u64 patterns[] __initdata = { | 11 | static u64 patterns[] __initdata = { |
13 | 0, | 12 | 0, |
@@ -35,7 +34,7 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
35 | (unsigned long long) pattern, | 34 | (unsigned long long) pattern, |
36 | (unsigned long long) start_bad, | 35 | (unsigned long long) start_bad, |
37 | (unsigned long long) end_bad); | 36 | (unsigned long long) end_bad); |
38 | reserve_early(start_bad, end_bad, "BAD RAM"); | 37 | memblock_x86_reserve_range(start_bad, end_bad, "BAD RAM"); |
39 | } | 38 | } |
40 | 39 | ||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 40 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) |
@@ -74,7 +73,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end) | |||
74 | u64 size = 0; | 73 | u64 size = 0; |
75 | 74 | ||
76 | while (start < end) { | 75 | while (start < end) { |
77 | start = find_e820_area_size(start, &size, 1); | 76 | start = memblock_x86_find_in_range_size(start, &size, 1); |
78 | 77 | ||
79 | /* done ? */ | 78 | /* done ? */ |
80 | if (start >= end) | 79 | if (start >= end) |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 809baaaf48b1..84a3e4c9f277 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/bootmem.h> | 26 | #include <linux/bootmem.h> |
27 | #include <linux/memblock.h> | ||
27 | #include <linux/mmzone.h> | 28 | #include <linux/mmzone.h> |
28 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
29 | #include <linux/initrd.h> | 30 | #include <linux/initrd.h> |
@@ -120,7 +121,7 @@ int __init get_memcfg_numa_flat(void) | |||
120 | 121 | ||
121 | node_start_pfn[0] = 0; | 122 | node_start_pfn[0] = 0; |
122 | node_end_pfn[0] = max_pfn; | 123 | node_end_pfn[0] = max_pfn; |
123 | e820_register_active_regions(0, 0, max_pfn); | 124 | memblock_x86_register_active_regions(0, 0, max_pfn); |
124 | memory_present(0, 0, max_pfn); | 125 | memory_present(0, 0, max_pfn); |
125 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); | 126 | node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn); |
126 | 127 | ||
@@ -161,14 +162,14 @@ static void __init allocate_pgdat(int nid) | |||
161 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | 162 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; |
162 | else { | 163 | else { |
163 | unsigned long pgdat_phys; | 164 | unsigned long pgdat_phys; |
164 | pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT, | 165 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, |
165 | max_pfn_mapped<<PAGE_SHIFT, | 166 | max_pfn_mapped<<PAGE_SHIFT, |
166 | sizeof(pg_data_t), | 167 | sizeof(pg_data_t), |
167 | PAGE_SIZE); | 168 | PAGE_SIZE); |
168 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | 169 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); |
169 | memset(buf, 0, sizeof(buf)); | 170 | memset(buf, 0, sizeof(buf)); |
170 | sprintf(buf, "NODE_DATA %d", nid); | 171 | sprintf(buf, "NODE_DATA %d", nid); |
171 | reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); | 172 | memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); |
172 | } | 173 | } |
173 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | 174 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", |
174 | nid, (unsigned long)NODE_DATA(nid)); | 175 | nid, (unsigned long)NODE_DATA(nid)); |
@@ -291,15 +292,15 @@ static __init unsigned long calculate_numa_remap_pages(void) | |||
291 | PTRS_PER_PTE); | 292 | PTRS_PER_PTE); |
292 | node_kva_target <<= PAGE_SHIFT; | 293 | node_kva_target <<= PAGE_SHIFT; |
293 | do { | 294 | do { |
294 | node_kva_final = find_e820_area(node_kva_target, | 295 | node_kva_final = memblock_find_in_range(node_kva_target, |
295 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, | 296 | ((u64)node_end_pfn[nid])<<PAGE_SHIFT, |
296 | ((u64)size)<<PAGE_SHIFT, | 297 | ((u64)size)<<PAGE_SHIFT, |
297 | LARGE_PAGE_BYTES); | 298 | LARGE_PAGE_BYTES); |
298 | node_kva_target -= LARGE_PAGE_BYTES; | 299 | node_kva_target -= LARGE_PAGE_BYTES; |
299 | } while (node_kva_final == -1ULL && | 300 | } while (node_kva_final == MEMBLOCK_ERROR && |
300 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); | 301 | (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid])); |
301 | 302 | ||
302 | if (node_kva_final == -1ULL) | 303 | if (node_kva_final == MEMBLOCK_ERROR) |
303 | panic("Can not get kva ram\n"); | 304 | panic("Can not get kva ram\n"); |
304 | 305 | ||
305 | node_remap_size[nid] = size; | 306 | node_remap_size[nid] = size; |
@@ -318,15 +319,13 @@ static __init unsigned long calculate_numa_remap_pages(void) | |||
318 | * but we could have some hole in high memory, and it will only | 319 | * but we could have some hole in high memory, and it will only |
319 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide | 320 | * check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide |
320 | * to use it as free. | 321 | * to use it as free. |
321 | * So reserve_early here, hope we don't run out of that array | 322 | * So memblock_x86_reserve_range here, hope we don't run out of that array |
322 | */ | 323 | */ |
323 | reserve_early(node_kva_final, | 324 | memblock_x86_reserve_range(node_kva_final, |
324 | node_kva_final+(((u64)size)<<PAGE_SHIFT), | 325 | node_kva_final+(((u64)size)<<PAGE_SHIFT), |
325 | "KVA RAM"); | 326 | "KVA RAM"); |
326 | 327 | ||
327 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; | 328 | node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT; |
328 | remove_active_range(nid, node_remap_start_pfn[nid], | ||
329 | node_remap_start_pfn[nid] + size); | ||
330 | } | 329 | } |
331 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", | 330 | printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n", |
332 | reserve_pages); | 331 | reserve_pages); |
@@ -367,14 +366,14 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
367 | 366 | ||
368 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); | 367 | kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); |
369 | do { | 368 | do { |
370 | kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT, | 369 | kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT, |
371 | max_low_pfn<<PAGE_SHIFT, | 370 | max_low_pfn<<PAGE_SHIFT, |
372 | kva_pages<<PAGE_SHIFT, | 371 | kva_pages<<PAGE_SHIFT, |
373 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; | 372 | PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT; |
374 | kva_target_pfn -= PTRS_PER_PTE; | 373 | kva_target_pfn -= PTRS_PER_PTE; |
375 | } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn); | 374 | } while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn); |
376 | 375 | ||
377 | if (kva_start_pfn == -1UL) | 376 | if (kva_start_pfn == MEMBLOCK_ERROR) |
378 | panic("Can not get kva space\n"); | 377 | panic("Can not get kva space\n"); |
379 | 378 | ||
380 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", | 379 | printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n", |
@@ -382,7 +381,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
382 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); | 381 | printk(KERN_INFO "max_pfn = %lx\n", max_pfn); |
383 | 382 | ||
384 | /* avoid clash with initrd */ | 383 | /* avoid clash with initrd */ |
385 | reserve_early(kva_start_pfn<<PAGE_SHIFT, | 384 | memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT, |
386 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, | 385 | (kva_start_pfn + kva_pages)<<PAGE_SHIFT, |
387 | "KVA PG"); | 386 | "KVA PG"); |
388 | #ifdef CONFIG_HIGHMEM | 387 | #ifdef CONFIG_HIGHMEM |
@@ -419,9 +418,6 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
419 | for_each_online_node(nid) { | 418 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 419 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->node_id = nid; | 420 | NODE_DATA(nid)->node_id = nid; |
422 | #ifndef CONFIG_NO_BOOTMEM | ||
423 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | ||
424 | #endif | ||
425 | } | 421 | } |
426 | 422 | ||
427 | setup_bootmem_allocator(); | 423 | setup_bootmem_allocator(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 4962f1aeda6f..60f498511dd6 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/bootmem.h> | 9 | #include <linux/bootmem.h> |
10 | #include <linux/memblock.h> | ||
10 | #include <linux/mmzone.h> | 11 | #include <linux/mmzone.h> |
11 | #include <linux/ctype.h> | 12 | #include <linux/ctype.h> |
12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
@@ -86,16 +87,16 @@ static int __init allocate_cachealigned_memnodemap(void) | |||
86 | 87 | ||
87 | addr = 0x8000; | 88 | addr = 0x8000; |
88 | nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); | 89 | nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); |
89 | nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, | 90 | nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT, |
90 | nodemap_size, L1_CACHE_BYTES); | 91 | nodemap_size, L1_CACHE_BYTES); |
91 | if (nodemap_addr == -1UL) { | 92 | if (nodemap_addr == MEMBLOCK_ERROR) { |
92 | printk(KERN_ERR | 93 | printk(KERN_ERR |
93 | "NUMA: Unable to allocate Memory to Node hash map\n"); | 94 | "NUMA: Unable to allocate Memory to Node hash map\n"); |
94 | nodemap_addr = nodemap_size = 0; | 95 | nodemap_addr = nodemap_size = 0; |
95 | return -1; | 96 | return -1; |
96 | } | 97 | } |
97 | memnodemap = phys_to_virt(nodemap_addr); | 98 | memnodemap = phys_to_virt(nodemap_addr); |
98 | reserve_early(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); | 99 | memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP"); |
99 | 100 | ||
100 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | 101 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", |
101 | nodemap_addr, nodemap_addr + nodemap_size); | 102 | nodemap_addr, nodemap_addr + nodemap_size); |
@@ -171,8 +172,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
171 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | 172 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && |
172 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | 173 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) |
173 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | 174 | start = MAX_DMA32_PFN<<PAGE_SHIFT; |
174 | mem = find_e820_area(start, end, size, align); | 175 | mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); |
175 | if (mem != -1L) | 176 | if (mem != MEMBLOCK_ERROR) |
176 | return __va(mem); | 177 | return __va(mem); |
177 | 178 | ||
178 | /* extend the search scope */ | 179 | /* extend the search scope */ |
@@ -181,8 +182,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
181 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | 182 | start = MAX_DMA32_PFN<<PAGE_SHIFT; |
182 | else | 183 | else |
183 | start = MAX_DMA_PFN<<PAGE_SHIFT; | 184 | start = MAX_DMA_PFN<<PAGE_SHIFT; |
184 | mem = find_e820_area(start, end, size, align); | 185 | mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); |
185 | if (mem != -1L) | 186 | if (mem != MEMBLOCK_ERROR) |
186 | return __va(mem); | 187 | return __va(mem); |
187 | 188 | ||
188 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | 189 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", |
@@ -198,10 +199,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
198 | unsigned long start_pfn, last_pfn, nodedata_phys; | 199 | unsigned long start_pfn, last_pfn, nodedata_phys; |
199 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 200 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
200 | int nid; | 201 | int nid; |
201 | #ifndef CONFIG_NO_BOOTMEM | ||
202 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
203 | void *bootmap; | ||
204 | #endif | ||
205 | 202 | ||
206 | if (!end) | 203 | if (!end) |
207 | return; | 204 | return; |
@@ -226,7 +223,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
226 | if (node_data[nodeid] == NULL) | 223 | if (node_data[nodeid] == NULL) |
227 | return; | 224 | return; |
228 | nodedata_phys = __pa(node_data[nodeid]); | 225 | nodedata_phys = __pa(node_data[nodeid]); |
229 | reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | 226 | memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); |
230 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | 227 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, |
231 | nodedata_phys + pgdat_size - 1); | 228 | nodedata_phys + pgdat_size - 1); |
232 | nid = phys_to_nid(nodedata_phys); | 229 | nid = phys_to_nid(nodedata_phys); |
@@ -238,47 +235,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
238 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 235 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
239 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 236 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
240 | 237 | ||
241 | #ifndef CONFIG_NO_BOOTMEM | ||
242 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
243 | |||
244 | /* | ||
245 | * Find a place for the bootmem map | ||
246 | * nodedata_phys could be on other nodes by alloc_bootmem, | ||
247 | * so need to sure bootmap_start not to be small, otherwise | ||
248 | * early_node_mem will get that with find_e820_area instead | ||
249 | * of alloc_bootmem, that could clash with reserved range | ||
250 | */ | ||
251 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); | ||
252 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
253 | /* | ||
254 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like | ||
255 | * to use that to align to PAGE_SIZE | ||
256 | */ | ||
257 | bootmap = early_node_mem(nodeid, bootmap_start, end, | ||
258 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | ||
259 | if (bootmap == NULL) { | ||
260 | free_early(nodedata_phys, nodedata_phys + pgdat_size); | ||
261 | node_data[nodeid] = NULL; | ||
262 | return; | ||
263 | } | ||
264 | bootmap_start = __pa(bootmap); | ||
265 | reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT), | ||
266 | "BOOTMAP"); | ||
267 | |||
268 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | ||
269 | bootmap_start >> PAGE_SHIFT, | ||
270 | start_pfn, last_pfn); | ||
271 | |||
272 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | ||
273 | bootmap_start, bootmap_start + bootmap_size - 1, | ||
274 | bootmap_pages); | ||
275 | nid = phys_to_nid(bootmap_start); | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | ||
278 | |||
279 | free_bootmem_with_active_regions(nodeid, end); | ||
280 | #endif | ||
281 | |||
282 | node_set_online(nodeid); | 238 | node_set_online(nodeid); |
283 | } | 239 | } |
284 | 240 | ||
@@ -416,7 +372,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
416 | nr_nodes = MAX_NUMNODES; | 372 | nr_nodes = MAX_NUMNODES; |
417 | } | 373 | } |
418 | 374 | ||
419 | size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; | 375 | size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; |
420 | /* | 376 | /* |
421 | * Calculate the number of big nodes that can be allocated as a result | 377 | * Calculate the number of big nodes that can be allocated as a result |
422 | * of consolidating the remainder. | 378 | * of consolidating the remainder. |
@@ -452,7 +408,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
452 | * non-reserved memory is less than the per-node size. | 408 | * non-reserved memory is less than the per-node size. |
453 | */ | 409 | */ |
454 | while (end - physnodes[i].start - | 410 | while (end - physnodes[i].start - |
455 | e820_hole_size(physnodes[i].start, end) < size) { | 411 | memblock_x86_hole_size(physnodes[i].start, end) < size) { |
456 | end += FAKE_NODE_MIN_SIZE; | 412 | end += FAKE_NODE_MIN_SIZE; |
457 | if (end > physnodes[i].end) { | 413 | if (end > physnodes[i].end) { |
458 | end = physnodes[i].end; | 414 | end = physnodes[i].end; |
@@ -466,7 +422,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
466 | * this one must extend to the boundary. | 422 | * this one must extend to the boundary. |
467 | */ | 423 | */ |
468 | if (end < dma32_end && dma32_end - end - | 424 | if (end < dma32_end && dma32_end - end - |
469 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | 425 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) |
470 | end = dma32_end; | 426 | end = dma32_end; |
471 | 427 | ||
472 | /* | 428 | /* |
@@ -475,7 +431,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
475 | * physical node. | 431 | * physical node. |
476 | */ | 432 | */ |
477 | if (physnodes[i].end - end - | 433 | if (physnodes[i].end - end - |
478 | e820_hole_size(end, physnodes[i].end) < size) | 434 | memblock_x86_hole_size(end, physnodes[i].end) < size) |
479 | end = physnodes[i].end; | 435 | end = physnodes[i].end; |
480 | 436 | ||
481 | /* | 437 | /* |
@@ -503,7 +459,7 @@ static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) | |||
503 | { | 459 | { |
504 | u64 end = start + size; | 460 | u64 end = start + size; |
505 | 461 | ||
506 | while (end - start - e820_hole_size(start, end) < size) { | 462 | while (end - start - memblock_x86_hole_size(start, end) < size) { |
507 | end += FAKE_NODE_MIN_SIZE; | 463 | end += FAKE_NODE_MIN_SIZE; |
508 | if (end > max_addr) { | 464 | if (end > max_addr) { |
509 | end = max_addr; | 465 | end = max_addr; |
@@ -532,7 +488,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
532 | * creates a uniform distribution of node sizes across the entire | 488 | * creates a uniform distribution of node sizes across the entire |
533 | * machine (but not necessarily over physical nodes). | 489 | * machine (but not necessarily over physical nodes). |
534 | */ | 490 | */ |
535 | min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) / | 491 | min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / |
536 | MAX_NUMNODES; | 492 | MAX_NUMNODES; |
537 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | 493 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); |
538 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) | 494 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) |
@@ -565,7 +521,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
565 | * this one must extend to the boundary. | 521 | * this one must extend to the boundary. |
566 | */ | 522 | */ |
567 | if (end < dma32_end && dma32_end - end - | 523 | if (end < dma32_end && dma32_end - end - |
568 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | 524 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) |
569 | end = dma32_end; | 525 | end = dma32_end; |
570 | 526 | ||
571 | /* | 527 | /* |
@@ -574,7 +530,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
574 | * physical node. | 530 | * physical node. |
575 | */ | 531 | */ |
576 | if (physnodes[i].end - end - | 532 | if (physnodes[i].end - end - |
577 | e820_hole_size(end, physnodes[i].end) < size) | 533 | memblock_x86_hole_size(end, physnodes[i].end) < size) |
578 | end = physnodes[i].end; | 534 | end = physnodes[i].end; |
579 | 535 | ||
580 | /* | 536 | /* |
@@ -638,7 +594,7 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
638 | */ | 594 | */ |
639 | remove_all_active_ranges(); | 595 | remove_all_active_ranges(); |
640 | for_each_node_mask(i, node_possible_map) { | 596 | for_each_node_mask(i, node_possible_map) { |
641 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 597 | memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
642 | nodes[i].end >> PAGE_SHIFT); | 598 | nodes[i].end >> PAGE_SHIFT); |
643 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 599 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
644 | } | 600 | } |
@@ -691,7 +647,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | |||
691 | node_set(0, node_possible_map); | 647 | node_set(0, node_possible_map); |
692 | for (i = 0; i < nr_cpu_ids; i++) | 648 | for (i = 0; i < nr_cpu_ids; i++) |
693 | numa_set_node(i, 0); | 649 | numa_set_node(i, 0); |
694 | e820_register_active_regions(0, start_pfn, last_pfn); | 650 | memblock_x86_register_active_regions(0, start_pfn, last_pfn); |
695 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); | 651 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); |
696 | } | 652 | } |
697 | 653 | ||
@@ -703,9 +659,7 @@ unsigned long __init numa_free_all_bootmem(void) | |||
703 | for_each_online_node(i) | 659 | for_each_online_node(i) |
704 | pages += free_all_bootmem_node(NODE_DATA(i)); | 660 | pages += free_all_bootmem_node(NODE_DATA(i)); |
705 | 661 | ||
706 | #ifdef CONFIG_NO_BOOTMEM | ||
707 | pages += free_all_memory_core_early(MAX_NUMNODES); | 662 | pages += free_all_memory_core_early(MAX_NUMNODES); |
708 | #endif | ||
709 | 663 | ||
710 | return pages; | 664 | return pages; |
711 | } | 665 | } |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 9324f13492d5..a17dffd136c1 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | */ | 25 | */ |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/memblock.h> | ||
28 | #include <linux/mmzone.h> | 29 | #include <linux/mmzone.h> |
29 | #include <linux/acpi.h> | 30 | #include <linux/acpi.h> |
30 | #include <linux/nodemask.h> | 31 | #include <linux/nodemask.h> |
@@ -264,7 +265,7 @@ int __init get_memcfg_from_srat(void) | |||
264 | if (node_read_chunk(chunk->nid, chunk)) | 265 | if (node_read_chunk(chunk->nid, chunk)) |
265 | continue; | 266 | continue; |
266 | 267 | ||
267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, | 268 | memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn, |
268 | min(chunk->end_pfn, max_pfn)); | 269 | min(chunk->end_pfn, max_pfn)); |
269 | } | 270 | } |
270 | /* for out of order entries in SRAT */ | 271 | /* for out of order entries in SRAT */ |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 9c0d0d399c30..a35cb9d8b060 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/topology.h> | 17 | #include <linux/topology.h> |
18 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
19 | #include <linux/memblock.h> | ||
19 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
20 | #include <asm/proto.h> | 21 | #include <asm/proto.h> |
21 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
@@ -98,15 +99,15 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | |||
98 | unsigned long phys; | 99 | unsigned long phys; |
99 | 100 | ||
100 | length = slit->header.length; | 101 | length = slit->header.length; |
101 | phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length, | 102 | phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length, |
102 | PAGE_SIZE); | 103 | PAGE_SIZE); |
103 | 104 | ||
104 | if (phys == -1L) | 105 | if (phys == MEMBLOCK_ERROR) |
105 | panic(" Can not save slit!\n"); | 106 | panic(" Can not save slit!\n"); |
106 | 107 | ||
107 | acpi_slit = __va(phys); | 108 | acpi_slit = __va(phys); |
108 | memcpy(acpi_slit, slit, length); | 109 | memcpy(acpi_slit, slit, length); |
109 | reserve_early(phys, phys + length, "ACPI SLIT"); | 110 | memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT"); |
110 | } | 111 | } |
111 | 112 | ||
112 | /* Callback for Proximity Domain -> x2APIC mapping */ | 113 | /* Callback for Proximity Domain -> x2APIC mapping */ |
@@ -324,7 +325,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
324 | pxmram = 0; | 325 | pxmram = 0; |
325 | } | 326 | } |
326 | 327 | ||
327 | e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); | 328 | e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); |
328 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | 329 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
329 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { | 330 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { |
330 | printk(KERN_ERR | 331 | printk(KERN_ERR |
@@ -421,7 +422,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
421 | } | 422 | } |
422 | 423 | ||
423 | for (i = 0; i < num_node_memblks; i++) | 424 | for (i = 0; i < num_node_memblks; i++) |
424 | e820_register_active_regions(memblk_nodeid[i], | 425 | memblock_x86_register_active_regions(memblk_nodeid[i], |
425 | node_memblk_range[i].start >> PAGE_SHIFT, | 426 | node_memblk_range[i].start >> PAGE_SHIFT, |
426 | node_memblk_range[i].end >> PAGE_SHIFT); | 427 | node_memblk_range[i].end >> PAGE_SHIFT); |
427 | 428 | ||
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8d17db266bbf..a011bcc0f943 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -322,29 +322,25 @@ static inline int eilvt_is_available(int offset) | |||
322 | 322 | ||
323 | static inline int ibs_eilvt_valid(void) | 323 | static inline int ibs_eilvt_valid(void) |
324 | { | 324 | { |
325 | u64 val; | ||
326 | int offset; | 325 | int offset; |
326 | u64 val; | ||
327 | 327 | ||
328 | rdmsrl(MSR_AMD64_IBSCTL, val); | 328 | rdmsrl(MSR_AMD64_IBSCTL, val); |
329 | offset = val & IBSCTL_LVT_OFFSET_MASK; | ||
330 | |||
329 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | 331 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { |
330 | pr_err(FW_BUG "cpu %d, invalid IBS " | 332 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", |
331 | "interrupt offset %d (MSR%08X=0x%016llx)", | 333 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); |
332 | smp_processor_id(), offset, | ||
333 | MSR_AMD64_IBSCTL, val); | ||
334 | return 0; | 334 | return 0; |
335 | } | 335 | } |
336 | 336 | ||
337 | offset = val & IBSCTL_LVT_OFFSET_MASK; | 337 | if (!eilvt_is_available(offset)) { |
338 | 338 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | |
339 | if (eilvt_is_available(offset)) | 339 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); |
340 | return !0; | 340 | return 0; |
341 | 341 | } | |
342 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d " | ||
343 | "not available (MSR%08X=0x%016llx)", | ||
344 | smp_processor_id(), offset, | ||
345 | MSR_AMD64_IBSCTL, val); | ||
346 | 342 | ||
347 | return 0; | 343 | return 1; |
348 | } | 344 | } |
349 | 345 | ||
350 | static inline int get_ibs_offset(void) | 346 | static inline int get_ibs_offset(void) |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7d46c8441418..63b83ceebd1a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/console.h> | 30 | #include <linux/console.h> |
31 | #include <linux/pci.h> | 31 | #include <linux/pci.h> |
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/memblock.h> | ||
33 | 34 | ||
34 | #include <xen/xen.h> | 35 | #include <xen/xen.h> |
35 | #include <xen/interface/xen.h> | 36 | #include <xen/interface/xen.h> |
@@ -1183,6 +1184,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1183 | local_irq_disable(); | 1184 | local_irq_disable(); |
1184 | early_boot_irqs_off(); | 1185 | early_boot_irqs_off(); |
1185 | 1186 | ||
1187 | memblock_init(); | ||
1188 | |||
1186 | xen_raw_console_write("mapping kernel into physical memory\n"); | 1189 | xen_raw_console_write("mapping kernel into physical memory\n"); |
1187 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | 1190 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); |
1188 | 1191 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b2363fcbcd0f..f72d18c69221 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/vmalloc.h> | 45 | #include <linux/vmalloc.h> |
46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
47 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
48 | #include <linux/memblock.h> | ||
48 | 49 | ||
49 | #include <asm/pgtable.h> | 50 | #include <asm/pgtable.h> |
50 | #include <asm/tlbflush.h> | 51 | #include <asm/tlbflush.h> |
@@ -55,6 +56,7 @@ | |||
55 | #include <asm/e820.h> | 56 | #include <asm/e820.h> |
56 | #include <asm/linkage.h> | 57 | #include <asm/linkage.h> |
57 | #include <asm/page.h> | 58 | #include <asm/page.h> |
59 | #include <asm/init.h> | ||
58 | 60 | ||
59 | #include <asm/xen/hypercall.h> | 61 | #include <asm/xen/hypercall.h> |
60 | #include <asm/xen/hypervisor.h> | 62 | #include <asm/xen/hypervisor.h> |
@@ -359,7 +361,8 @@ void make_lowmem_page_readonly(void *vaddr) | |||
359 | unsigned int level; | 361 | unsigned int level; |
360 | 362 | ||
361 | pte = lookup_address(address, &level); | 363 | pte = lookup_address(address, &level); |
362 | BUG_ON(pte == NULL); | 364 | if (pte == NULL) |
365 | return; /* vaddr missing */ | ||
363 | 366 | ||
364 | ptev = pte_wrprotect(*pte); | 367 | ptev = pte_wrprotect(*pte); |
365 | 368 | ||
@@ -374,7 +377,8 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
374 | unsigned int level; | 377 | unsigned int level; |
375 | 378 | ||
376 | pte = lookup_address(address, &level); | 379 | pte = lookup_address(address, &level); |
377 | BUG_ON(pte == NULL); | 380 | if (pte == NULL) |
381 | return; /* vaddr missing */ | ||
378 | 382 | ||
379 | ptev = pte_mkwrite(*pte); | 383 | ptev = pte_mkwrite(*pte); |
380 | 384 | ||
@@ -1508,13 +1512,25 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
1508 | #endif | 1512 | #endif |
1509 | } | 1513 | } |
1510 | 1514 | ||
1511 | #ifdef CONFIG_X86_32 | ||
1512 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | 1515 | static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) |
1513 | { | 1516 | { |
1517 | unsigned long pfn = pte_pfn(pte); | ||
1518 | |||
1519 | #ifdef CONFIG_X86_32 | ||
1514 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ | 1520 | /* If there's an existing pte, then don't allow _PAGE_RW to be set */ |
1515 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) | 1521 | if (pte_val_ma(*ptep) & _PAGE_PRESENT) |
1516 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & | 1522 | pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & |
1517 | pte_val_ma(pte)); | 1523 | pte_val_ma(pte)); |
1524 | #endif | ||
1525 | |||
1526 | /* | ||
1527 | * If the new pfn is within the range of the newly allocated | ||
1528 | * kernel pagetable, and it isn't being mapped into an | ||
1529 | * early_ioremap fixmap slot, make sure it is RO. | ||
1530 | */ | ||
1531 | if (!is_early_ioremap_ptep(ptep) && | ||
1532 | pfn >= e820_table_start && pfn < e820_table_end) | ||
1533 | pte = pte_wrprotect(pte); | ||
1518 | 1534 | ||
1519 | return pte; | 1535 | return pte; |
1520 | } | 1536 | } |
@@ -1527,7 +1543,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
1527 | 1543 | ||
1528 | xen_set_pte(ptep, pte); | 1544 | xen_set_pte(ptep, pte); |
1529 | } | 1545 | } |
1530 | #endif | ||
1531 | 1546 | ||
1532 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) | 1547 | static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) |
1533 | { | 1548 | { |
@@ -1814,7 +1829,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1814 | __xen_write_cr3(true, __pa(pgd)); | 1829 | __xen_write_cr3(true, __pa(pgd)); |
1815 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 1830 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1816 | 1831 | ||
1817 | reserve_early(__pa(xen_start_info->pt_base), | 1832 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1818 | __pa(xen_start_info->pt_base + | 1833 | __pa(xen_start_info->pt_base + |
1819 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 1834 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1820 | "XEN PAGETABLES"); | 1835 | "XEN PAGETABLES"); |
@@ -1852,7 +1867,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1852 | 1867 | ||
1853 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | 1868 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); |
1854 | 1869 | ||
1855 | reserve_early(__pa(xen_start_info->pt_base), | 1870 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
1856 | __pa(xen_start_info->pt_base + | 1871 | __pa(xen_start_info->pt_base + |
1857 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 1872 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
1858 | "XEN PAGETABLES"); | 1873 | "XEN PAGETABLES"); |
@@ -1971,11 +1986,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1971 | .alloc_pmd = xen_alloc_pmd_init, | 1986 | .alloc_pmd = xen_alloc_pmd_init, |
1972 | .release_pmd = xen_release_pmd_init, | 1987 | .release_pmd = xen_release_pmd_init, |
1973 | 1988 | ||
1974 | #ifdef CONFIG_X86_64 | ||
1975 | .set_pte = xen_set_pte, | ||
1976 | #else | ||
1977 | .set_pte = xen_set_pte_init, | 1989 | .set_pte = xen_set_pte_init, |
1978 | #endif | ||
1979 | .set_pte_at = xen_set_pte_at, | 1990 | .set_pte_at = xen_set_pte_at, |
1980 | .set_pmd = xen_set_pmd_hyper, | 1991 | .set_pmd = xen_set_pmd_hyper, |
1981 | 1992 | ||
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index a013ec9d0c54..22471001b74c 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c | |||
@@ -5,6 +5,7 @@ | |||
5 | 5 | ||
6 | #include <asm/xen/hypervisor.h> | 6 | #include <asm/xen/hypervisor.h> |
7 | #include <xen/xen.h> | 7 | #include <xen/xen.h> |
8 | #include <asm/iommu_table.h> | ||
8 | 9 | ||
9 | int xen_swiotlb __read_mostly; | 10 | int xen_swiotlb __read_mostly; |
10 | 11 | ||
@@ -56,3 +57,7 @@ void __init pci_xen_swiotlb_init(void) | |||
56 | dma_ops = &xen_swiotlb_dma_ops; | 57 | dma_ops = &xen_swiotlb_dma_ops; |
57 | } | 58 | } |
58 | } | 59 | } |
60 | IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, | ||
61 | 0, | ||
62 | pci_xen_swiotlb_init, | ||
63 | 0); | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 328b00305426..9729c903404b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/pm.h> | 10 | #include <linux/pm.h> |
11 | #include <linux/memblock.h> | ||
11 | 12 | ||
12 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
13 | #include <asm/vdso.h> | 14 | #include <asm/vdso.h> |
@@ -129,7 +130,7 @@ char * __init xen_memory_setup(void) | |||
129 | * - xen_start_info | 130 | * - xen_start_info |
130 | * See comment above "struct start_info" in <xen/interface/xen.h> | 131 | * See comment above "struct start_info" in <xen/interface/xen.h> |
131 | */ | 132 | */ |
132 | reserve_early(__pa(xen_start_info->mfn_list), | 133 | memblock_x86_reserve_range(__pa(xen_start_info->mfn_list), |
133 | __pa(xen_start_info->pt_base), | 134 | __pa(xen_start_info->pt_base), |
134 | "XEN START INFO"); | 135 | "XEN START INFO"); |
135 | 136 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e0500646585d..23e061b9327b 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -224,7 +224,7 @@ static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enab | |||
224 | goto out; | 224 | goto out; |
225 | } | 225 | } |
226 | 226 | ||
227 | flags = __raw_local_save_flags(); | 227 | flags = arch_local_save_flags(); |
228 | if (irq_enable) { | 228 | if (irq_enable) { |
229 | ADD_STATS(taken_slow_irqenable, 1); | 229 | ADD_STATS(taken_slow_irqenable, 1); |
230 | raw_local_irq_enable(); | 230 | raw_local_irq_enable(); |