aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2005-09-08 15:39:59 -0400
committerTony Luck <tony.luck@intel.com>2005-09-08 15:39:59 -0400
commitd8c97d5f3aa348272df2ccb4e224b1cf9a1eb6d7 (patch)
treecb71557f43084027559762f58e51f2df5d5e5c46
parent4706df3d3c42af802597d82c8b1542c3d52eab23 (diff)
[IA64] simplified efi memory map parsing
New version leaves the original memory map unmodified. Also saves any granule trimmings for use by the uncached memory allocator. Inspired by Khalid Aziz (various traces of his patch still remain). Fixes to uncached_build_memmap() and sn2 testing by Martin Hicks. Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/efi.c423
-rw-r--r--arch/ia64/kernel/setup.c3
-rw-r--r--arch/ia64/kernel/uncached.c17
-rw-r--r--include/asm-ia64/meminit.h4
4 files changed, 254 insertions, 193 deletions
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 179f230816ed..1291db581721 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -239,57 +239,30 @@ is_available_memory (efi_memory_desc_t *md)
239 return 0; 239 return 0;
240} 240}
241 241
242/* 242typedef struct kern_memdesc {
243 * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers 243 u64 attribute;
244 * memory that is normally available to the kernel, issue a warning that some memory 244 u64 start;
245 * is being ignored. 245 u64 num_pages;
246 */ 246} kern_memdesc_t;
247static void
248trim_bottom (efi_memory_desc_t *md, u64 start_addr)
249{
250 u64 num_skipped_pages;
251
252 if (md->phys_addr >= start_addr || !md->num_pages)
253 return;
254
255 num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
256 if (num_skipped_pages > md->num_pages)
257 num_skipped_pages = md->num_pages;
258 247
259 if (is_available_memory(md)) 248static kern_memdesc_t *kern_memmap;
260 printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
261 "at 0x%lx\n", __FUNCTION__,
262 (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
263 md->phys_addr, start_addr - IA64_GRANULE_SIZE);
264 /*
265 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
266 * descriptor list to become unsorted. In such a case, md->num_pages will be
267 * zero, so the Right Thing will happen.
268 */
269 md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
270 md->num_pages -= num_skipped_pages;
271}
272 249
273static void 250static void
274trim_top (efi_memory_desc_t *md, u64 end_addr) 251walk (efi_freemem_callback_t callback, void *arg, u64 attr)
275{ 252{
276 u64 num_dropped_pages, md_end_addr; 253 kern_memdesc_t *k;
254 u64 start, end, voff;
277 255
278 md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); 256 voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET;
279 257 for (k = kern_memmap; k->start != ~0UL; k++) {
280 if (md_end_addr <= end_addr || !md->num_pages) 258 if (k->attribute != attr)
281 return; 259 continue;
282 260 start = PAGE_ALIGN(k->start);
283 num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT; 261 end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK;
284 if (num_dropped_pages > md->num_pages) 262 if (start < end)
285 num_dropped_pages = md->num_pages; 263 if ((*callback)(start + voff, end + voff, arg) < 0)
286 264 return;
287 if (is_available_memory(md)) 265 }
288 printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
289 "at 0x%lx\n", __FUNCTION__,
290 (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
291 md->phys_addr, end_addr);
292 md->num_pages -= num_dropped_pages;
293} 266}
294 267
295/* 268/*
@@ -299,148 +272,19 @@ trim_top (efi_memory_desc_t *md, u64 end_addr)
299void 272void
300efi_memmap_walk (efi_freemem_callback_t callback, void *arg) 273efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
301{ 274{
302 int prev_valid = 0; 275 walk(callback, arg, EFI_MEMORY_WB);
303 struct range {
304 u64 start;
305 u64 end;
306 } prev, curr;
307 void *efi_map_start, *efi_map_end, *p, *q;
308 efi_memory_desc_t *md, *check_md;
309 u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
310 unsigned long total_mem = 0;
311
312 efi_map_start = __va(ia64_boot_param->efi_memmap);
313 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
314 efi_desc_size = ia64_boot_param->efi_memdesc_size;
315
316 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
317 md = p;
318
319 /* skip over non-WB memory descriptors; that's all we're interested in... */
320 if (!(md->attribute & EFI_MEMORY_WB))
321 continue;
322
323 /*
324 * granule_addr is the base of md's first granule.
325 * [granule_addr - first_non_wb_addr) is guaranteed to
326 * be contiguous WB memory.
327 */
328 granule_addr = GRANULEROUNDDOWN(md->phys_addr);
329 first_non_wb_addr = max(first_non_wb_addr, granule_addr);
330
331 if (first_non_wb_addr < md->phys_addr) {
332 trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
333 granule_addr = GRANULEROUNDDOWN(md->phys_addr);
334 first_non_wb_addr = max(first_non_wb_addr, granule_addr);
335 }
336
337 for (q = p; q < efi_map_end; q += efi_desc_size) {
338 check_md = q;
339
340 if ((check_md->attribute & EFI_MEMORY_WB) &&
341 (check_md->phys_addr == first_non_wb_addr))
342 first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
343 else
344 break; /* non-WB or hole */
345 }
346
347 last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
348 if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
349 trim_top(md, last_granule_addr);
350
351 if (is_available_memory(md)) {
352 if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
353 if (md->phys_addr >= max_addr)
354 continue;
355 md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
356 first_non_wb_addr = max_addr;
357 }
358
359 if (total_mem >= mem_limit)
360 continue;
361
362 if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
363 unsigned long limit_addr = md->phys_addr;
364
365 limit_addr += mem_limit - total_mem;
366 limit_addr = GRANULEROUNDDOWN(limit_addr);
367
368 if (md->phys_addr > limit_addr)
369 continue;
370
371 md->num_pages = (limit_addr - md->phys_addr) >>
372 EFI_PAGE_SHIFT;
373 first_non_wb_addr = max_addr = md->phys_addr +
374 (md->num_pages << EFI_PAGE_SHIFT);
375 }
376 total_mem += (md->num_pages << EFI_PAGE_SHIFT);
377
378 if (md->num_pages == 0)
379 continue;
380
381 curr.start = PAGE_OFFSET + md->phys_addr;
382 curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
383
384 if (!prev_valid) {
385 prev = curr;
386 prev_valid = 1;
387 } else {
388 if (curr.start < prev.start)
389 printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
390
391 if (prev.end == curr.start) {
392 /* merge two consecutive memory ranges */
393 prev.end = curr.end;
394 } else {
395 start = PAGE_ALIGN(prev.start);
396 end = prev.end & PAGE_MASK;
397 if ((end > start) && (*callback)(start, end, arg) < 0)
398 return;
399 prev = curr;
400 }
401 }
402 }
403 }
404 if (prev_valid) {
405 start = PAGE_ALIGN(prev.start);
406 end = prev.end & PAGE_MASK;
407 if (end > start)
408 (*callback)(start, end, arg);
409 }
410} 276}
411 277
412/* 278/*
413 * Walk the EFI memory map to pull out leftover pages in the lower 279 * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
414 * memory regions which do not end up in the regular memory map and 280 * has memory that is available for uncached allocator.
415 * stick them into the uncached allocator
416 *
417 * The regular walk function is significantly more complex than the
418 * uncached walk which means it really doesn't make sense to try and
419 * marge the two.
420 */ 281 */
421void __init 282void
422efi_memmap_walk_uc (efi_freemem_callback_t callback) 283efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
423{ 284{
424 void *efi_map_start, *efi_map_end, *p; 285 walk(callback, arg, EFI_MEMORY_UC);
425 efi_memory_desc_t *md;
426 u64 efi_desc_size, start, end;
427
428 efi_map_start = __va(ia64_boot_param->efi_memmap);
429 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
430 efi_desc_size = ia64_boot_param->efi_memdesc_size;
431
432 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
433 md = p;
434 if (md->attribute == EFI_MEMORY_UC) {
435 start = PAGE_ALIGN(md->phys_addr);
436 end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK);
437 if ((*callback)(start, end, NULL) < 0)
438 return;
439 }
440 }
441} 286}
442 287
443
444/* 288/*
445 * Look for the PAL_CODE region reported by EFI and maps it using an 289 * Look for the PAL_CODE region reported by EFI and maps it using an
446 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor 290 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
@@ -862,3 +706,220 @@ efi_uart_console_only(void)
862 printk(KERN_ERR "Malformed %s value\n", name); 706 printk(KERN_ERR "Malformed %s value\n", name);
863 return 0; 707 return 0;
864} 708}
709
710#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT)
711
712static inline u64
713kmd_end(kern_memdesc_t *kmd)
714{
715 return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
716}
717
718static inline u64
719efi_md_end(efi_memory_desc_t *md)
720{
721 return (md->phys_addr + efi_md_size(md));
722}
723
724static inline int
725efi_wb(efi_memory_desc_t *md)
726{
727 return (md->attribute & EFI_MEMORY_WB);
728}
729
730static inline int
731efi_uc(efi_memory_desc_t *md)
732{
733 return (md->attribute & EFI_MEMORY_UC);
734}
735
736/*
737 * Look for the first granule aligned memory descriptor memory
738 * that is big enough to hold EFI memory map. Make sure this
739 * descriptor is atleast granule sized so it does not get trimmed
740 */
741struct kern_memdesc *
742find_memmap_space (void)
743{
744 u64 contig_low=0, contig_high=0;
745 u64 as = 0, ae;
746 void *efi_map_start, *efi_map_end, *p, *q;
747 efi_memory_desc_t *md, *pmd = NULL, *check_md;
748 u64 space_needed, efi_desc_size;
749 unsigned long total_mem = 0;
750
751 efi_map_start = __va(ia64_boot_param->efi_memmap);
752 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
753 efi_desc_size = ia64_boot_param->efi_memdesc_size;
754
755 /*
756 * Worst case: we need 3 kernel descriptors for each efi descriptor
757 * (if every entry has a WB part in the middle, and UC head and tail),
758 * plus one for the end marker.
759 */
760 space_needed = sizeof(kern_memdesc_t) *
761 (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1);
762
763 for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
764 md = p;
765 if (!efi_wb(md)) {
766 continue;
767 }
768 if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
769 contig_low = GRANULEROUNDUP(md->phys_addr);
770 contig_high = efi_md_end(md);
771 for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
772 check_md = q;
773 if (!efi_wb(check_md))
774 break;
775 if (contig_high != check_md->phys_addr)
776 break;
777 contig_high = efi_md_end(check_md);
778 }
779 contig_high = GRANULEROUNDDOWN(contig_high);
780 }
781 if (!is_available_memory(md) || md->type == EFI_LOADER_DATA)
782 continue;
783
784 /* Round ends inward to granule boundaries */
785 as = max(contig_low, md->phys_addr);
786 ae = min(contig_high, efi_md_end(md));
787
788 /* keep within max_addr= command line arg */
789 ae = min(ae, max_addr);
790 if (ae <= as)
791 continue;
792
793 /* avoid going over mem= command line arg */
794 if (total_mem + (ae - as) > mem_limit)
795 ae -= total_mem + (ae - as) - mem_limit;
796
797 if (ae <= as)
798 continue;
799
800 if (ae - as > space_needed)
801 break;
802 }
803 if (p >= efi_map_end)
804 panic("Can't allocate space for kernel memory descriptors");
805
806 return __va(as);
807}
808
809/*
810 * Walk the EFI memory map and gather all memory available for kernel
811 * to use. We can allocate partial granules only if the unavailable
812 * parts exist, and are WB.
813 */
814void
815efi_memmap_init(unsigned long *s, unsigned long *e)
816{
817 struct kern_memdesc *k, *prev = 0;
818 u64 contig_low=0, contig_high=0;
819 u64 as, ae, lim;
820 void *efi_map_start, *efi_map_end, *p, *q;
821 efi_memory_desc_t *md, *pmd = NULL, *check_md;
822 u64 efi_desc_size;
823 unsigned long total_mem = 0;
824
825 k = kern_memmap = find_memmap_space();
826
827 efi_map_start = __va(ia64_boot_param->efi_memmap);
828 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
829 efi_desc_size = ia64_boot_param->efi_memdesc_size;
830
831 for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
832 md = p;
833 if (!efi_wb(md)) {
834 if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY ||
835 md->type == EFI_BOOT_SERVICES_DATA)) {
836 k->attribute = EFI_MEMORY_UC;
837 k->start = md->phys_addr;
838 k->num_pages = md->num_pages;
839 k++;
840 }
841 continue;
842 }
843 if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
844 contig_low = GRANULEROUNDUP(md->phys_addr);
845 contig_high = efi_md_end(md);
846 for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
847 check_md = q;
848 if (!efi_wb(check_md))
849 break;
850 if (contig_high != check_md->phys_addr)
851 break;
852 contig_high = efi_md_end(check_md);
853 }
854 contig_high = GRANULEROUNDDOWN(contig_high);
855 }
856 if (!is_available_memory(md))
857 continue;
858
859 /*
860 * Round ends inward to granule boundaries
861 * Give trimmings to uncached allocator
862 */
863 if (md->phys_addr < contig_low) {
864 lim = min(efi_md_end(md), contig_low);
865 if (efi_uc(md)) {
866 if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC &&
867 kmd_end(k-1) == md->phys_addr) {
868 (k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
869 } else {
870 k->attribute = EFI_MEMORY_UC;
871 k->start = md->phys_addr;
872 k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
873 k++;
874 }
875 }
876 as = contig_low;
877 } else
878 as = md->phys_addr;
879
880 if (efi_md_end(md) > contig_high) {
881 lim = max(md->phys_addr, contig_high);
882 if (efi_uc(md)) {
883 if (lim == md->phys_addr && k > kern_memmap &&
884 (k-1)->attribute == EFI_MEMORY_UC &&
885 kmd_end(k-1) == md->phys_addr) {
886 (k-1)->num_pages += md->num_pages;
887 } else {
888 k->attribute = EFI_MEMORY_UC;
889 k->start = lim;
890 k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT;
891 k++;
892 }
893 }
894 ae = contig_high;
895 } else
896 ae = efi_md_end(md);
897
898 /* keep within max_addr= command line arg */
899 ae = min(ae, max_addr);
900 if (ae <= as)
901 continue;
902
903 /* avoid going over mem= command line arg */
904 if (total_mem + (ae - as) > mem_limit)
905 ae -= total_mem + (ae - as) - mem_limit;
906
907 if (ae <= as)
908 continue;
909 if (prev && kmd_end(prev) == md->phys_addr) {
910 prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT;
911 total_mem += ae - as;
912 continue;
913 }
914 k->attribute = EFI_MEMORY_WB;
915 k->start = as;
916 k->num_pages = (ae - as) >> EFI_PAGE_SHIFT;
917 total_mem += ae - as;
918 prev = k++;
919 }
920 k->start = ~0L; /* end-marker */
921
922 /* reserve the memory we are using for kern_memmap */
923 *s = (u64)kern_memmap;
924 *e = (u64)++k;
925}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 84f89da7c640..1658d687b79c 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -211,6 +211,9 @@ reserve_memory (void)
211 } 211 }
212#endif 212#endif
213 213
214 efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
215 n++;
216
214 /* end of memory marker */ 217 /* end of memory marker */
215 rsvd_region[n].start = ~0UL; 218 rsvd_region[n].start = ~0UL;
216 rsvd_region[n].end = ~0UL; 219 rsvd_region[n].end = ~0UL;
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index 4e9d06c48a8b..c6d40446c2c4 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -205,23 +205,18 @@ EXPORT_SYMBOL(uncached_free_page);
205static int __init 205static int __init
206uncached_build_memmap(unsigned long start, unsigned long end, void *arg) 206uncached_build_memmap(unsigned long start, unsigned long end, void *arg)
207{ 207{
208 long length; 208 long length = end - start;
209 unsigned long vstart, vend;
210 int node; 209 int node;
211 210
212 length = end - start;
213 vstart = start + __IA64_UNCACHED_OFFSET;
214 vend = end + __IA64_UNCACHED_OFFSET;
215
216 dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); 211 dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end);
217 212
218 memset((char *)vstart, 0, length); 213 memset((char *)start, 0, length);
219 214
220 node = paddr_to_nid(start); 215 node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET);
221 216
222 for (; vstart < vend ; vstart += PAGE_SIZE) { 217 for (; start < end ; start += PAGE_SIZE) {
223 dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); 218 dprintk(KERN_INFO "sticking %lx into the pool!\n", start);
224 gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE); 219 gen_pool_free(uncached_pool[node], start, PAGE_SIZE);
225 } 220 }
226 221
227 return 0; 222 return 0;
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index 1590dc65b30b..90646632237c 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -16,10 +16,11 @@
16 * - initrd (optional) 16 * - initrd (optional)
17 * - command line string 17 * - command line string
18 * - kernel code & data 18 * - kernel code & data
19 * - Kernel memory map built from EFI memory map
19 * 20 *
20 * More could be added if necessary 21 * More could be added if necessary
21 */ 22 */
22#define IA64_MAX_RSVD_REGIONS 5 23#define IA64_MAX_RSVD_REGIONS 6
23 24
24struct rsvd_region { 25struct rsvd_region {
25 unsigned long start; /* virtual address of beginning of element */ 26 unsigned long start; /* virtual address of beginning of element */
@@ -33,6 +34,7 @@ extern void find_memory (void);
33extern void reserve_memory (void); 34extern void reserve_memory (void);
34extern void find_initrd (void); 35extern void find_initrd (void);
35extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg); 36extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
37extern void efi_memmap_init(unsigned long *, unsigned long *);
36 38
37/* 39/*
38 * For rounding an address to the next IA64_GRANULE_SIZE or order 40 * For rounding an address to the next IA64_GRANULE_SIZE or order