summaryrefslogtreecommitdiffstats
path: root/include/linux/memblock.h
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2015-06-24 19:58:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 20:49:44 -0400
commitfc6daaf93151877748f8096af6b3fddb147f22d6 (patch)
tree1892f34cca08d40af6598bccae87c42037c5ea80 /include/linux/memblock.h
parent6afdb859b71019143b8eecda02b8b29b03185055 (diff)
mm/memblock: add extra "flags" to memblock to allow selection of memory based on attribute
Some high end Intel Xeon systems report uncorrectable memory errors as a recoverable machine check. Linux has included code for some time to process these and just signal the affected processes (or even recover completely if the error was in a read only page that can be replaced by reading from disk). But we have no recovery path for errors encountered during kernel code execution. Except for some very specific cases were are unlikely to ever be able to recover. Enter memory mirroring. Actually 3rd generation of memory mirroing. Gen1: All memory is mirrored Pro: No s/w enabling - h/w just gets good data from other side of the mirror Con: Halves effective memory capacity available to OS/applications Gen2: Partial memory mirror - just mirror memory begind some memory controllers Pro: Keep more of the capacity Con: Nightmare to enable. Have to choose between allocating from mirrored memory for safety vs. NUMA local memory for performance Gen3: Address range partial memory mirror - some mirror on each memory controller Pro: Can tune the amount of mirror and keep NUMA performance Con: I have to write memory management code to implement The current plan is just to use mirrored memory for kernel allocations. This has been broken into two phases: 1) This patch series - find the mirrored memory, use it for boot time allocations 2) Wade into mm/page_alloc.c and define a ZONE_MIRROR to pick up the unused mirrored memory from mm/memblock.c and only give it out to select kernel allocations (this is still being scoped because page_alloc.c is scary). This patch (of 3): Add extra "flags" to memblock to allow selection of memory based on attribute. No functional changes Signed-off-by: Tony Luck <tony.luck@intel.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Hanjun Guo <guohanjun@huawei.com> Cc: Xiexiuqi <xiexiuqi@huawei.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Naoya Horiguchi <nao.horiguchi@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/memblock.h')
-rw-r--r--include/linux/memblock.h41
1 files changed, 26 insertions, 15 deletions
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9497ec7c77ea..7aeec0cb4c27 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -21,7 +21,10 @@
21#define INIT_PHYSMEM_REGIONS 4 21#define INIT_PHYSMEM_REGIONS 4
22 22
23/* Definition of memblock flags. */ 23/* Definition of memblock flags. */
24#define MEMBLOCK_HOTPLUG 0x1 /* hotpluggable region */ 24enum {
25 MEMBLOCK_NONE = 0x0, /* No special request */
26 MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */
27};
25 28
26struct memblock_region { 29struct memblock_region {
27 phys_addr_t base; 30 phys_addr_t base;
@@ -61,7 +64,7 @@ extern bool movable_node_enabled;
61 64
62phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, 65phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
63 phys_addr_t start, phys_addr_t end, 66 phys_addr_t start, phys_addr_t end,
64 int nid); 67 int nid, ulong flags);
65phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, 68phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
66 phys_addr_t size, phys_addr_t align); 69 phys_addr_t size, phys_addr_t align);
67phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); 70phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr);
@@ -85,11 +88,13 @@ int memblock_remove_range(struct memblock_type *type,
85 phys_addr_t base, 88 phys_addr_t base,
86 phys_addr_t size); 89 phys_addr_t size);
87 90
88void __next_mem_range(u64 *idx, int nid, struct memblock_type *type_a, 91void __next_mem_range(u64 *idx, int nid, ulong flags,
92 struct memblock_type *type_a,
89 struct memblock_type *type_b, phys_addr_t *out_start, 93 struct memblock_type *type_b, phys_addr_t *out_start,
90 phys_addr_t *out_end, int *out_nid); 94 phys_addr_t *out_end, int *out_nid);
91 95
92void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a, 96void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
97 struct memblock_type *type_a,
93 struct memblock_type *type_b, phys_addr_t *out_start, 98 struct memblock_type *type_b, phys_addr_t *out_start,
94 phys_addr_t *out_end, int *out_nid); 99 phys_addr_t *out_end, int *out_nid);
95 100
@@ -100,16 +105,17 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
100 * @type_a: ptr to memblock_type to iterate 105 * @type_a: ptr to memblock_type to iterate
101 * @type_b: ptr to memblock_type which excludes from the iteration 106 * @type_b: ptr to memblock_type which excludes from the iteration
102 * @nid: node selector, %NUMA_NO_NODE for all nodes 107 * @nid: node selector, %NUMA_NO_NODE for all nodes
108 * @flags: pick from blocks based on memory attributes
103 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 109 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
104 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 110 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
105 * @p_nid: ptr to int for nid of the range, can be %NULL 111 * @p_nid: ptr to int for nid of the range, can be %NULL
106 */ 112 */
107#define for_each_mem_range(i, type_a, type_b, nid, \ 113#define for_each_mem_range(i, type_a, type_b, nid, flags, \
108 p_start, p_end, p_nid) \ 114 p_start, p_end, p_nid) \
109 for (i = 0, __next_mem_range(&i, nid, type_a, type_b, \ 115 for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b, \
110 p_start, p_end, p_nid); \ 116 p_start, p_end, p_nid); \
111 i != (u64)ULLONG_MAX; \ 117 i != (u64)ULLONG_MAX; \
112 __next_mem_range(&i, nid, type_a, type_b, \ 118 __next_mem_range(&i, nid, flags, type_a, type_b, \
113 p_start, p_end, p_nid)) 119 p_start, p_end, p_nid))
114 120
115/** 121/**
@@ -119,17 +125,18 @@ void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
119 * @type_a: ptr to memblock_type to iterate 125 * @type_a: ptr to memblock_type to iterate
120 * @type_b: ptr to memblock_type which excludes from the iteration 126 * @type_b: ptr to memblock_type which excludes from the iteration
121 * @nid: node selector, %NUMA_NO_NODE for all nodes 127 * @nid: node selector, %NUMA_NO_NODE for all nodes
128 * @flags: pick from blocks based on memory attributes
122 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 129 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
123 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 130 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
124 * @p_nid: ptr to int for nid of the range, can be %NULL 131 * @p_nid: ptr to int for nid of the range, can be %NULL
125 */ 132 */
126#define for_each_mem_range_rev(i, type_a, type_b, nid, \ 133#define for_each_mem_range_rev(i, type_a, type_b, nid, flags, \
127 p_start, p_end, p_nid) \ 134 p_start, p_end, p_nid) \
128 for (i = (u64)ULLONG_MAX, \ 135 for (i = (u64)ULLONG_MAX, \
129 __next_mem_range_rev(&i, nid, type_a, type_b, \ 136 __next_mem_range_rev(&i, nid, flags, type_a, type_b,\
130 p_start, p_end, p_nid); \ 137 p_start, p_end, p_nid); \
131 i != (u64)ULLONG_MAX; \ 138 i != (u64)ULLONG_MAX; \
132 __next_mem_range_rev(&i, nid, type_a, type_b, \ 139 __next_mem_range_rev(&i, nid, flags, type_a, type_b, \
133 p_start, p_end, p_nid)) 140 p_start, p_end, p_nid))
134 141
135#ifdef CONFIG_MOVABLE_NODE 142#ifdef CONFIG_MOVABLE_NODE
@@ -181,13 +188,14 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
181 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 188 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
182 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 189 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
183 * @p_nid: ptr to int for nid of the range, can be %NULL 190 * @p_nid: ptr to int for nid of the range, can be %NULL
191 * @flags: pick from blocks based on memory attributes
184 * 192 *
185 * Walks over free (memory && !reserved) areas of memblock. Available as 193 * Walks over free (memory && !reserved) areas of memblock. Available as
186 * soon as memblock is initialized. 194 * soon as memblock is initialized.
187 */ 195 */
188#define for_each_free_mem_range(i, nid, p_start, p_end, p_nid) \ 196#define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \
189 for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ 197 for_each_mem_range(i, &memblock.memory, &memblock.reserved, \
190 nid, p_start, p_end, p_nid) 198 nid, flags, p_start, p_end, p_nid)
191 199
192/** 200/**
193 * for_each_free_mem_range_reverse - rev-iterate through free memblock areas 201 * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
@@ -196,13 +204,15 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
196 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 204 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
197 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 205 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
198 * @p_nid: ptr to int for nid of the range, can be %NULL 206 * @p_nid: ptr to int for nid of the range, can be %NULL
207 * @flags: pick from blocks based on memory attributes
199 * 208 *
200 * Walks over free (memory && !reserved) areas of memblock in reverse 209 * Walks over free (memory && !reserved) areas of memblock in reverse
201 * order. Available as soon as memblock is initialized. 210 * order. Available as soon as memblock is initialized.
202 */ 211 */
203#define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid) \ 212#define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end, \
213 p_nid) \
204 for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ 214 for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \
205 nid, p_start, p_end, p_nid) 215 nid, flags, p_start, p_end, p_nid)
206 216
207static inline void memblock_set_region_flags(struct memblock_region *r, 217static inline void memblock_set_region_flags(struct memblock_region *r,
208 unsigned long flags) 218 unsigned long flags)
@@ -273,7 +283,8 @@ static inline bool memblock_bottom_up(void) { return false; }
273#define MEMBLOCK_ALLOC_ACCESSIBLE 0 283#define MEMBLOCK_ALLOC_ACCESSIBLE 0
274 284
275phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, 285phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
276 phys_addr_t start, phys_addr_t end); 286 phys_addr_t start, phys_addr_t end,
287 ulong flags);
277phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, 288phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
278 phys_addr_t max_addr); 289 phys_addr_t max_addr);
279phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, 290phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,