diff options
author | Tony Luck <tony.luck@intel.com> | 2005-09-08 15:39:59 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-09-08 15:39:59 -0400 |
commit | d8c97d5f3aa348272df2ccb4e224b1cf9a1eb6d7 (patch) | |
tree | cb71557f43084027559762f58e51f2df5d5e5c46 /arch/ia64/kernel/efi.c | |
parent | 4706df3d3c42af802597d82c8b1542c3d52eab23 (diff) |
[IA64] simplified efi memory map parsing
New version leaves the original memory map unmodified.
Also saves any granule trimmings for use by the uncached
memory allocator.
Inspired by Khalid Aziz (various traces of his patch still
remain). Fixes to uncached_build_memmap() and sn2 testing
by Martin Hicks.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/efi.c')
-rw-r--r-- | arch/ia64/kernel/efi.c | 423 |
1 files changed, 242 insertions, 181 deletions
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 179f230816ed..1291db581721 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c | |||
@@ -239,57 +239,30 @@ is_available_memory (efi_memory_desc_t *md) | |||
239 | return 0; | 239 | return 0; |
240 | } | 240 | } |
241 | 241 | ||
242 | /* | 242 | typedef struct kern_memdesc { |
243 | * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers | 243 | u64 attribute; |
244 | * memory that is normally available to the kernel, issue a warning that some memory | 244 | u64 start; |
245 | * is being ignored. | 245 | u64 num_pages; |
246 | */ | 246 | } kern_memdesc_t; |
247 | static void | ||
248 | trim_bottom (efi_memory_desc_t *md, u64 start_addr) | ||
249 | { | ||
250 | u64 num_skipped_pages; | ||
251 | |||
252 | if (md->phys_addr >= start_addr || !md->num_pages) | ||
253 | return; | ||
254 | |||
255 | num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT; | ||
256 | if (num_skipped_pages > md->num_pages) | ||
257 | num_skipped_pages = md->num_pages; | ||
258 | 247 | ||
259 | if (is_available_memory(md)) | 248 | static kern_memdesc_t *kern_memmap; |
260 | printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " | ||
261 | "at 0x%lx\n", __FUNCTION__, | ||
262 | (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, | ||
263 | md->phys_addr, start_addr - IA64_GRANULE_SIZE); | ||
264 | /* | ||
265 | * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory | ||
266 | * descriptor list to become unsorted. In such a case, md->num_pages will be | ||
267 | * zero, so the Right Thing will happen. | ||
268 | */ | ||
269 | md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT; | ||
270 | md->num_pages -= num_skipped_pages; | ||
271 | } | ||
272 | 249 | ||
273 | static void | 250 | static void |
274 | trim_top (efi_memory_desc_t *md, u64 end_addr) | 251 | walk (efi_freemem_callback_t callback, void *arg, u64 attr) |
275 | { | 252 | { |
276 | u64 num_dropped_pages, md_end_addr; | 253 | kern_memdesc_t *k; |
254 | u64 start, end, voff; | ||
277 | 255 | ||
278 | md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); | 256 | voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET; |
279 | 257 | for (k = kern_memmap; k->start != ~0UL; k++) { | |
280 | if (md_end_addr <= end_addr || !md->num_pages) | 258 | if (k->attribute != attr) |
281 | return; | 259 | continue; |
282 | 260 | start = PAGE_ALIGN(k->start); | |
283 | num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT; | 261 | end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; |
284 | if (num_dropped_pages > md->num_pages) | 262 | if (start < end) |
285 | num_dropped_pages = md->num_pages; | 263 | if ((*callback)(start + voff, end + voff, arg) < 0) |
286 | 264 | return; | |
287 | if (is_available_memory(md)) | 265 | } |
288 | printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " | ||
289 | "at 0x%lx\n", __FUNCTION__, | ||
290 | (num_dropped_pages << EFI_PAGE_SHIFT) >> 10, | ||
291 | md->phys_addr, end_addr); | ||
292 | md->num_pages -= num_dropped_pages; | ||
293 | } | 266 | } |
294 | 267 | ||
295 | /* | 268 | /* |
@@ -299,148 +272,19 @@ trim_top (efi_memory_desc_t *md, u64 end_addr) | |||
299 | void | 272 | void |
300 | efi_memmap_walk (efi_freemem_callback_t callback, void *arg) | 273 | efi_memmap_walk (efi_freemem_callback_t callback, void *arg) |
301 | { | 274 | { |
302 | int prev_valid = 0; | 275 | walk(callback, arg, EFI_MEMORY_WB); |
303 | struct range { | ||
304 | u64 start; | ||
305 | u64 end; | ||
306 | } prev, curr; | ||
307 | void *efi_map_start, *efi_map_end, *p, *q; | ||
308 | efi_memory_desc_t *md, *check_md; | ||
309 | u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0; | ||
310 | unsigned long total_mem = 0; | ||
311 | |||
312 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
313 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
314 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
315 | |||
316 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
317 | md = p; | ||
318 | |||
319 | /* skip over non-WB memory descriptors; that's all we're interested in... */ | ||
320 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
321 | continue; | ||
322 | |||
323 | /* | ||
324 | * granule_addr is the base of md's first granule. | ||
325 | * [granule_addr - first_non_wb_addr) is guaranteed to | ||
326 | * be contiguous WB memory. | ||
327 | */ | ||
328 | granule_addr = GRANULEROUNDDOWN(md->phys_addr); | ||
329 | first_non_wb_addr = max(first_non_wb_addr, granule_addr); | ||
330 | |||
331 | if (first_non_wb_addr < md->phys_addr) { | ||
332 | trim_bottom(md, granule_addr + IA64_GRANULE_SIZE); | ||
333 | granule_addr = GRANULEROUNDDOWN(md->phys_addr); | ||
334 | first_non_wb_addr = max(first_non_wb_addr, granule_addr); | ||
335 | } | ||
336 | |||
337 | for (q = p; q < efi_map_end; q += efi_desc_size) { | ||
338 | check_md = q; | ||
339 | |||
340 | if ((check_md->attribute & EFI_MEMORY_WB) && | ||
341 | (check_md->phys_addr == first_non_wb_addr)) | ||
342 | first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT; | ||
343 | else | ||
344 | break; /* non-WB or hole */ | ||
345 | } | ||
346 | |||
347 | last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr); | ||
348 | if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) | ||
349 | trim_top(md, last_granule_addr); | ||
350 | |||
351 | if (is_available_memory(md)) { | ||
352 | if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) { | ||
353 | if (md->phys_addr >= max_addr) | ||
354 | continue; | ||
355 | md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT; | ||
356 | first_non_wb_addr = max_addr; | ||
357 | } | ||
358 | |||
359 | if (total_mem >= mem_limit) | ||
360 | continue; | ||
361 | |||
362 | if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) { | ||
363 | unsigned long limit_addr = md->phys_addr; | ||
364 | |||
365 | limit_addr += mem_limit - total_mem; | ||
366 | limit_addr = GRANULEROUNDDOWN(limit_addr); | ||
367 | |||
368 | if (md->phys_addr > limit_addr) | ||
369 | continue; | ||
370 | |||
371 | md->num_pages = (limit_addr - md->phys_addr) >> | ||
372 | EFI_PAGE_SHIFT; | ||
373 | first_non_wb_addr = max_addr = md->phys_addr + | ||
374 | (md->num_pages << EFI_PAGE_SHIFT); | ||
375 | } | ||
376 | total_mem += (md->num_pages << EFI_PAGE_SHIFT); | ||
377 | |||
378 | if (md->num_pages == 0) | ||
379 | continue; | ||
380 | |||
381 | curr.start = PAGE_OFFSET + md->phys_addr; | ||
382 | curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); | ||
383 | |||
384 | if (!prev_valid) { | ||
385 | prev = curr; | ||
386 | prev_valid = 1; | ||
387 | } else { | ||
388 | if (curr.start < prev.start) | ||
389 | printk(KERN_ERR "Oops: EFI memory table not ordered!\n"); | ||
390 | |||
391 | if (prev.end == curr.start) { | ||
392 | /* merge two consecutive memory ranges */ | ||
393 | prev.end = curr.end; | ||
394 | } else { | ||
395 | start = PAGE_ALIGN(prev.start); | ||
396 | end = prev.end & PAGE_MASK; | ||
397 | if ((end > start) && (*callback)(start, end, arg) < 0) | ||
398 | return; | ||
399 | prev = curr; | ||
400 | } | ||
401 | } | ||
402 | } | ||
403 | } | ||
404 | if (prev_valid) { | ||
405 | start = PAGE_ALIGN(prev.start); | ||
406 | end = prev.end & PAGE_MASK; | ||
407 | if (end > start) | ||
408 | (*callback)(start, end, arg); | ||
409 | } | ||
410 | } | 276 | } |
411 | 277 | ||
412 | /* | 278 | /* |
413 | * Walk the EFI memory map to pull out leftover pages in the lower | 279 | * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that |
414 | * memory regions which do not end up in the regular memory map and | 280 | * has memory that is available for uncached allocator. |
415 | * stick them into the uncached allocator | ||
416 | * | ||
417 | * The regular walk function is significantly more complex than the | ||
418 | * uncached walk which means it really doesn't make sense to try and | ||
419 | * marge the two. | ||
420 | */ | 281 | */ |
421 | void __init | 282 | void |
422 | efi_memmap_walk_uc (efi_freemem_callback_t callback) | 283 | efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg) |
423 | { | 284 | { |
424 | void *efi_map_start, *efi_map_end, *p; | 285 | walk(callback, arg, EFI_MEMORY_UC); |
425 | efi_memory_desc_t *md; | ||
426 | u64 efi_desc_size, start, end; | ||
427 | |||
428 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
429 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
430 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
431 | |||
432 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
433 | md = p; | ||
434 | if (md->attribute == EFI_MEMORY_UC) { | ||
435 | start = PAGE_ALIGN(md->phys_addr); | ||
436 | end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK); | ||
437 | if ((*callback)(start, end, NULL) < 0) | ||
438 | return; | ||
439 | } | ||
440 | } | ||
441 | } | 286 | } |
442 | 287 | ||
443 | |||
444 | /* | 288 | /* |
445 | * Look for the PAL_CODE region reported by EFI and maps it using an | 289 | * Look for the PAL_CODE region reported by EFI and maps it using an |
446 | * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor | 290 | * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor |
@@ -862,3 +706,220 @@ efi_uart_console_only(void) | |||
862 | printk(KERN_ERR "Malformed %s value\n", name); | 706 | printk(KERN_ERR "Malformed %s value\n", name); |
863 | return 0; | 707 | return 0; |
864 | } | 708 | } |
709 | |||
710 | #define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) | ||
711 | |||
712 | static inline u64 | ||
713 | kmd_end(kern_memdesc_t *kmd) | ||
714 | { | ||
715 | return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); | ||
716 | } | ||
717 | |||
718 | static inline u64 | ||
719 | efi_md_end(efi_memory_desc_t *md) | ||
720 | { | ||
721 | return (md->phys_addr + efi_md_size(md)); | ||
722 | } | ||
723 | |||
724 | static inline int | ||
725 | efi_wb(efi_memory_desc_t *md) | ||
726 | { | ||
727 | return (md->attribute & EFI_MEMORY_WB); | ||
728 | } | ||
729 | |||
730 | static inline int | ||
731 | efi_uc(efi_memory_desc_t *md) | ||
732 | { | ||
733 | return (md->attribute & EFI_MEMORY_UC); | ||
734 | } | ||
735 | |||
736 | /* | ||
737 | * Look for the first granule aligned memory descriptor memory | ||
738 | * that is big enough to hold EFI memory map. Make sure this | ||
739 | * descriptor is atleast granule sized so it does not get trimmed | ||
740 | */ | ||
741 | struct kern_memdesc * | ||
742 | find_memmap_space (void) | ||
743 | { | ||
744 | u64 contig_low=0, contig_high=0; | ||
745 | u64 as = 0, ae; | ||
746 | void *efi_map_start, *efi_map_end, *p, *q; | ||
747 | efi_memory_desc_t *md, *pmd = NULL, *check_md; | ||
748 | u64 space_needed, efi_desc_size; | ||
749 | unsigned long total_mem = 0; | ||
750 | |||
751 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
752 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
753 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
754 | |||
755 | /* | ||
756 | * Worst case: we need 3 kernel descriptors for each efi descriptor | ||
757 | * (if every entry has a WB part in the middle, and UC head and tail), | ||
758 | * plus one for the end marker. | ||
759 | */ | ||
760 | space_needed = sizeof(kern_memdesc_t) * | ||
761 | (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1); | ||
762 | |||
763 | for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { | ||
764 | md = p; | ||
765 | if (!efi_wb(md)) { | ||
766 | continue; | ||
767 | } | ||
768 | if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) { | ||
769 | contig_low = GRANULEROUNDUP(md->phys_addr); | ||
770 | contig_high = efi_md_end(md); | ||
771 | for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) { | ||
772 | check_md = q; | ||
773 | if (!efi_wb(check_md)) | ||
774 | break; | ||
775 | if (contig_high != check_md->phys_addr) | ||
776 | break; | ||
777 | contig_high = efi_md_end(check_md); | ||
778 | } | ||
779 | contig_high = GRANULEROUNDDOWN(contig_high); | ||
780 | } | ||
781 | if (!is_available_memory(md) || md->type == EFI_LOADER_DATA) | ||
782 | continue; | ||
783 | |||
784 | /* Round ends inward to granule boundaries */ | ||
785 | as = max(contig_low, md->phys_addr); | ||
786 | ae = min(contig_high, efi_md_end(md)); | ||
787 | |||
788 | /* keep within max_addr= command line arg */ | ||
789 | ae = min(ae, max_addr); | ||
790 | if (ae <= as) | ||
791 | continue; | ||
792 | |||
793 | /* avoid going over mem= command line arg */ | ||
794 | if (total_mem + (ae - as) > mem_limit) | ||
795 | ae -= total_mem + (ae - as) - mem_limit; | ||
796 | |||
797 | if (ae <= as) | ||
798 | continue; | ||
799 | |||
800 | if (ae - as > space_needed) | ||
801 | break; | ||
802 | } | ||
803 | if (p >= efi_map_end) | ||
804 | panic("Can't allocate space for kernel memory descriptors"); | ||
805 | |||
806 | return __va(as); | ||
807 | } | ||
808 | |||
809 | /* | ||
810 | * Walk the EFI memory map and gather all memory available for kernel | ||
811 | * to use. We can allocate partial granules only if the unavailable | ||
812 | * parts exist, and are WB. | ||
813 | */ | ||
814 | void | ||
815 | efi_memmap_init(unsigned long *s, unsigned long *e) | ||
816 | { | ||
817 | struct kern_memdesc *k, *prev = 0; | ||
818 | u64 contig_low=0, contig_high=0; | ||
819 | u64 as, ae, lim; | ||
820 | void *efi_map_start, *efi_map_end, *p, *q; | ||
821 | efi_memory_desc_t *md, *pmd = NULL, *check_md; | ||
822 | u64 efi_desc_size; | ||
823 | unsigned long total_mem = 0; | ||
824 | |||
825 | k = kern_memmap = find_memmap_space(); | ||
826 | |||
827 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
828 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
829 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
830 | |||
831 | for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { | ||
832 | md = p; | ||
833 | if (!efi_wb(md)) { | ||
834 | if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY || | ||
835 | md->type == EFI_BOOT_SERVICES_DATA)) { | ||
836 | k->attribute = EFI_MEMORY_UC; | ||
837 | k->start = md->phys_addr; | ||
838 | k->num_pages = md->num_pages; | ||
839 | k++; | ||
840 | } | ||
841 | continue; | ||
842 | } | ||
843 | if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) { | ||
844 | contig_low = GRANULEROUNDUP(md->phys_addr); | ||
845 | contig_high = efi_md_end(md); | ||
846 | for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) { | ||
847 | check_md = q; | ||
848 | if (!efi_wb(check_md)) | ||
849 | break; | ||
850 | if (contig_high != check_md->phys_addr) | ||
851 | break; | ||
852 | contig_high = efi_md_end(check_md); | ||
853 | } | ||
854 | contig_high = GRANULEROUNDDOWN(contig_high); | ||
855 | } | ||
856 | if (!is_available_memory(md)) | ||
857 | continue; | ||
858 | |||
859 | /* | ||
860 | * Round ends inward to granule boundaries | ||
861 | * Give trimmings to uncached allocator | ||
862 | */ | ||
863 | if (md->phys_addr < contig_low) { | ||
864 | lim = min(efi_md_end(md), contig_low); | ||
865 | if (efi_uc(md)) { | ||
866 | if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC && | ||
867 | kmd_end(k-1) == md->phys_addr) { | ||
868 | (k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT; | ||
869 | } else { | ||
870 | k->attribute = EFI_MEMORY_UC; | ||
871 | k->start = md->phys_addr; | ||
872 | k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT; | ||
873 | k++; | ||
874 | } | ||
875 | } | ||
876 | as = contig_low; | ||
877 | } else | ||
878 | as = md->phys_addr; | ||
879 | |||
880 | if (efi_md_end(md) > contig_high) { | ||
881 | lim = max(md->phys_addr, contig_high); | ||
882 | if (efi_uc(md)) { | ||
883 | if (lim == md->phys_addr && k > kern_memmap && | ||
884 | (k-1)->attribute == EFI_MEMORY_UC && | ||
885 | kmd_end(k-1) == md->phys_addr) { | ||
886 | (k-1)->num_pages += md->num_pages; | ||
887 | } else { | ||
888 | k->attribute = EFI_MEMORY_UC; | ||
889 | k->start = lim; | ||
890 | k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT; | ||
891 | k++; | ||
892 | } | ||
893 | } | ||
894 | ae = contig_high; | ||
895 | } else | ||
896 | ae = efi_md_end(md); | ||
897 | |||
898 | /* keep within max_addr= command line arg */ | ||
899 | ae = min(ae, max_addr); | ||
900 | if (ae <= as) | ||
901 | continue; | ||
902 | |||
903 | /* avoid going over mem= command line arg */ | ||
904 | if (total_mem + (ae - as) > mem_limit) | ||
905 | ae -= total_mem + (ae - as) - mem_limit; | ||
906 | |||
907 | if (ae <= as) | ||
908 | continue; | ||
909 | if (prev && kmd_end(prev) == md->phys_addr) { | ||
910 | prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT; | ||
911 | total_mem += ae - as; | ||
912 | continue; | ||
913 | } | ||
914 | k->attribute = EFI_MEMORY_WB; | ||
915 | k->start = as; | ||
916 | k->num_pages = (ae - as) >> EFI_PAGE_SHIFT; | ||
917 | total_mem += ae - as; | ||
918 | prev = k++; | ||
919 | } | ||
920 | k->start = ~0L; /* end-marker */ | ||
921 | |||
922 | /* reserve the memory we are using for kern_memmap */ | ||
923 | *s = (u64)kern_memmap; | ||
924 | *e = (u64)++k; | ||
925 | } | ||