diff options
author | Bjorn Helgaas <bjorn.helgaas@hp.com> | 2006-05-05 19:19:50 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2006-05-08 19:32:05 -0400 |
commit | 32e62c636a728cb39c0b3bd191286f2ca65d4028 (patch) | |
tree | 656454a01e720819103c172daae15b5f2fd85d68 /arch/ia64 | |
parent | 6810b548b25114607e0814612d84125abccc0a4f (diff) |
[IA64] rework memory attribute aliasing
This closes a couple holes in our attribute aliasing avoidance scheme:
- The current kernel fails mmaps of some /dev/mem MMIO regions because
they don't appear in the EFI memory map. This keeps X from working
on the Intel Tiger box.
- The current kernel allows UC mmap of the 0-1MB region of
/sys/.../legacy_mem even when the chipset doesn't support UC
access. This causes an MCA when starting X on HP rx7620 and rx8620
boxes in the default configuration.
There's more detail in the Documentation/ia64/aliasing.txt file this
adds, but the general idea is that if a region might be covered by
a granule-sized kernel identity mapping, any access via /dev/mem or
mmap must use the same attribute as the identity mapping.
Otherwise, we fall back to using an attribute that is supported
according to the EFI memory map, or to using UC if the EFI memory
map doesn't mention the region.
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64')
-rw-r--r-- | arch/ia64/kernel/efi.c | 156 | ||||
-rw-r--r-- | arch/ia64/mm/ioremap.c | 27 | ||||
-rw-r--r-- | arch/ia64/pci/pci.c | 17 |
3 files changed, 139 insertions, 61 deletions
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 12cfedce73b1..c33d0ba7e300 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c | |||
@@ -8,6 +8,8 @@ | |||
8 | * Copyright (C) 1999-2003 Hewlett-Packard Co. | 8 | * Copyright (C) 1999-2003 Hewlett-Packard Co. |
9 | * David Mosberger-Tang <davidm@hpl.hp.com> | 9 | * David Mosberger-Tang <davidm@hpl.hp.com> |
10 | * Stephane Eranian <eranian@hpl.hp.com> | 10 | * Stephane Eranian <eranian@hpl.hp.com> |
11 | * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. | ||
12 | * Bjorn Helgaas <bjorn.helgaas@hp.com> | ||
11 | * | 13 | * |
12 | * All EFI Runtime Services are not implemented yet as EFI only | 14 | * All EFI Runtime Services are not implemented yet as EFI only |
13 | * supports physical mode addressing on SoftSDV. This is to be fixed | 15 | * supports physical mode addressing on SoftSDV. This is to be fixed |
@@ -622,28 +624,20 @@ efi_get_iobase (void) | |||
622 | return 0; | 624 | return 0; |
623 | } | 625 | } |
624 | 626 | ||
625 | static efi_memory_desc_t * | 627 | static struct kern_memdesc * |
626 | efi_memory_descriptor (unsigned long phys_addr) | 628 | kern_memory_descriptor (unsigned long phys_addr) |
627 | { | 629 | { |
628 | void *efi_map_start, *efi_map_end, *p; | 630 | struct kern_memdesc *md; |
629 | efi_memory_desc_t *md; | ||
630 | u64 efi_desc_size; | ||
631 | |||
632 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
633 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
634 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
635 | 631 | ||
636 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | 632 | for (md = kern_memmap; md->start != ~0UL; md++) { |
637 | md = p; | 633 | if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) |
638 | |||
639 | if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) | ||
640 | return md; | 634 | return md; |
641 | } | 635 | } |
642 | return 0; | 636 | return 0; |
643 | } | 637 | } |
644 | 638 | ||
645 | static int | 639 | static efi_memory_desc_t * |
646 | efi_memmap_has_mmio (void) | 640 | efi_memory_descriptor (unsigned long phys_addr) |
647 | { | 641 | { |
648 | void *efi_map_start, *efi_map_end, *p; | 642 | void *efi_map_start, *efi_map_end, *p; |
649 | efi_memory_desc_t *md; | 643 | efi_memory_desc_t *md; |
@@ -656,8 +650,8 @@ efi_memmap_has_mmio (void) | |||
656 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | 650 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { |
657 | md = p; | 651 | md = p; |
658 | 652 | ||
659 | if (md->type == EFI_MEMORY_MAPPED_IO) | 653 | if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) |
660 | return 1; | 654 | return md; |
661 | } | 655 | } |
662 | return 0; | 656 | return 0; |
663 | } | 657 | } |
@@ -683,71 +677,125 @@ efi_mem_attributes (unsigned long phys_addr) | |||
683 | } | 677 | } |
684 | EXPORT_SYMBOL(efi_mem_attributes); | 678 | EXPORT_SYMBOL(efi_mem_attributes); |
685 | 679 | ||
686 | /* | 680 | u64 |
687 | * Determines whether the memory at phys_addr supports the desired | 681 | efi_mem_attribute (unsigned long phys_addr, unsigned long size) |
688 | * attribute (WB, UC, etc). If this returns 1, the caller can safely | ||
689 | * access size bytes at phys_addr with the specified attribute. | ||
690 | */ | ||
691 | int | ||
692 | efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr) | ||
693 | { | 682 | { |
694 | unsigned long end = phys_addr + size; | 683 | unsigned long end = phys_addr + size; |
695 | efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); | 684 | efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); |
685 | u64 attr; | ||
686 | |||
687 | if (!md) | ||
688 | return 0; | ||
689 | |||
690 | /* | ||
691 | * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells | ||
692 | * the kernel that firmware needs this region mapped. | ||
693 | */ | ||
694 | attr = md->attribute & ~EFI_MEMORY_RUNTIME; | ||
695 | do { | ||
696 | unsigned long md_end = efi_md_end(md); | ||
697 | |||
698 | if (end <= md_end) | ||
699 | return attr; | ||
700 | |||
701 | md = efi_memory_descriptor(md_end); | ||
702 | if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) | ||
703 | return 0; | ||
704 | } while (md); | ||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | u64 | ||
709 | kern_mem_attribute (unsigned long phys_addr, unsigned long size) | ||
710 | { | ||
711 | unsigned long end = phys_addr + size; | ||
712 | struct kern_memdesc *md; | ||
713 | u64 attr; | ||
696 | 714 | ||
697 | /* | 715 | /* |
698 | * Some firmware doesn't report MMIO regions in the EFI memory | 716 | * This is a hack for ioremap calls before we set up kern_memmap. |
699 | * map. The Intel BigSur (a.k.a. HP i2000) has this problem. | 717 | * Maybe we should do efi_memmap_init() earlier instead. |
700 | * On those platforms, we have to assume UC is valid everywhere. | ||
701 | */ | 718 | */ |
702 | if (!md || (md->attribute & attr) != attr) { | 719 | if (!kern_memmap) { |
703 | if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio()) | 720 | attr = efi_mem_attribute(phys_addr, size); |
704 | return 1; | 721 | if (attr & EFI_MEMORY_WB) |
722 | return EFI_MEMORY_WB; | ||
705 | return 0; | 723 | return 0; |
706 | } | 724 | } |
707 | 725 | ||
726 | md = kern_memory_descriptor(phys_addr); | ||
727 | if (!md) | ||
728 | return 0; | ||
729 | |||
730 | attr = md->attribute; | ||
708 | do { | 731 | do { |
709 | unsigned long md_end = efi_md_end(md); | 732 | unsigned long md_end = kmd_end(md); |
710 | 733 | ||
711 | if (end <= md_end) | 734 | if (end <= md_end) |
712 | return 1; | 735 | return attr; |
713 | 736 | ||
714 | md = efi_memory_descriptor(md_end); | 737 | md = kern_memory_descriptor(md_end); |
715 | if (!md || (md->attribute & attr) != attr) | 738 | if (!md || md->attribute != attr) |
716 | return 0; | 739 | return 0; |
717 | } while (md); | 740 | } while (md); |
718 | return 0; | 741 | return 0; |
719 | } | 742 | } |
743 | EXPORT_SYMBOL(kern_mem_attribute); | ||
720 | 744 | ||
721 | /* | ||
722 | * For /dev/mem, we only allow read & write system calls to access | ||
723 | * write-back memory, because read & write don't allow the user to | ||
724 | * control access size. | ||
725 | */ | ||
726 | int | 745 | int |
727 | valid_phys_addr_range (unsigned long phys_addr, unsigned long size) | 746 | valid_phys_addr_range (unsigned long phys_addr, unsigned long size) |
728 | { | 747 | { |
729 | return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB); | 748 | u64 attr; |
749 | |||
750 | /* | ||
751 | * /dev/mem reads and writes use copy_to_user(), which implicitly | ||
752 | * uses a granule-sized kernel identity mapping. It's really | ||
753 | * only safe to do this for regions in kern_memmap. For more | ||
754 | * details, see Documentation/ia64/aliasing.txt. | ||
755 | */ | ||
756 | attr = kern_mem_attribute(phys_addr, size); | ||
757 | if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) | ||
758 | return 1; | ||
759 | return 0; | ||
730 | } | 760 | } |
731 | 761 | ||
732 | /* | ||
733 | * We allow mmap of anything in the EFI memory map that supports | ||
734 | * either write-back or uncacheable access. For uncacheable regions, | ||
735 | * the supported access sizes are system-dependent, and the user is | ||
736 | * responsible for using the correct size. | ||
737 | * | ||
738 | * Note that this doesn't currently allow access to hot-added memory, | ||
739 | * because that doesn't appear in the boot-time EFI memory map. | ||
740 | */ | ||
741 | int | 762 | int |
742 | valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size) | 763 | valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size) |
743 | { | 764 | { |
744 | if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB)) | 765 | /* |
745 | return 1; | 766 | * MMIO regions are often missing from the EFI memory map. |
767 | * We must allow mmap of them for programs like X, so we | ||
768 | * currently can't do any useful validation. | ||
769 | */ | ||
770 | return 1; | ||
771 | } | ||
746 | 772 | ||
747 | if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC)) | 773 | pgprot_t |
748 | return 1; | 774 | phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, |
775 | pgprot_t vma_prot) | ||
776 | { | ||
777 | unsigned long phys_addr = pfn << PAGE_SHIFT; | ||
778 | u64 attr; | ||
749 | 779 | ||
750 | return 0; | 780 | /* |
781 | * For /dev/mem mmap, we use user mappings, but if the region is | ||
782 | * in kern_memmap (and hence may be covered by a kernel mapping), | ||
783 | * we must use the same attribute as the kernel mapping. | ||
784 | */ | ||
785 | attr = kern_mem_attribute(phys_addr, size); | ||
786 | if (attr & EFI_MEMORY_WB) | ||
787 | return pgprot_cacheable(vma_prot); | ||
788 | else if (attr & EFI_MEMORY_UC) | ||
789 | return pgprot_noncached(vma_prot); | ||
790 | |||
791 | /* | ||
792 | * Some chipsets don't support UC access to memory. If | ||
793 | * WB is supported, we prefer that. | ||
794 | */ | ||
795 | if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) | ||
796 | return pgprot_cacheable(vma_prot); | ||
797 | |||
798 | return pgprot_noncached(vma_prot); | ||
751 | } | 799 | } |
752 | 800 | ||
753 | int __init | 801 | int __init |
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 643ccc6960ce..07bd02b6c372 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/efi.h> | 12 | #include <linux/efi.h> |
13 | #include <asm/io.h> | 13 | #include <asm/io.h> |
14 | #include <asm/meminit.h> | ||
14 | 15 | ||
15 | static inline void __iomem * | 16 | static inline void __iomem * |
16 | __ioremap (unsigned long offset, unsigned long size) | 17 | __ioremap (unsigned long offset, unsigned long size) |
@@ -21,16 +22,29 @@ __ioremap (unsigned long offset, unsigned long size) | |||
21 | void __iomem * | 22 | void __iomem * |
22 | ioremap (unsigned long offset, unsigned long size) | 23 | ioremap (unsigned long offset, unsigned long size) |
23 | { | 24 | { |
24 | if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB)) | 25 | u64 attr; |
25 | return phys_to_virt(offset); | 26 | unsigned long gran_base, gran_size; |
26 | 27 | ||
27 | if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC)) | 28 | /* |
29 | * For things in kern_memmap, we must use the same attribute | ||
30 | * as the rest of the kernel. For more details, see | ||
31 | * Documentation/ia64/aliasing.txt. | ||
32 | */ | ||
33 | attr = kern_mem_attribute(offset, size); | ||
34 | if (attr & EFI_MEMORY_WB) | ||
35 | return phys_to_virt(offset); | ||
36 | else if (attr & EFI_MEMORY_UC) | ||
28 | return __ioremap(offset, size); | 37 | return __ioremap(offset, size); |
29 | 38 | ||
30 | /* | 39 | /* |
31 | * Someday this should check ACPI resources so we | 40 | * Some chipsets don't support UC access to memory. If |
32 | * can do the right thing for hot-plugged regions. | 41 | * WB is supported for the whole granule, we prefer that. |
33 | */ | 42 | */ |
43 | gran_base = GRANULEROUNDDOWN(offset); | ||
44 | gran_size = GRANULEROUNDUP(offset + size) - gran_base; | ||
45 | if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) | ||
46 | return phys_to_virt(offset); | ||
47 | |||
34 | return __ioremap(offset, size); | 48 | return __ioremap(offset, size); |
35 | } | 49 | } |
36 | EXPORT_SYMBOL(ioremap); | 50 | EXPORT_SYMBOL(ioremap); |
@@ -38,6 +52,9 @@ EXPORT_SYMBOL(ioremap); | |||
38 | void __iomem * | 52 | void __iomem * |
39 | ioremap_nocache (unsigned long offset, unsigned long size) | 53 | ioremap_nocache (unsigned long offset, unsigned long size) |
40 | { | 54 | { |
55 | if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB) | ||
56 | return 0; | ||
57 | |||
41 | return __ioremap(offset, size); | 58 | return __ioremap(offset, size); |
42 | } | 59 | } |
43 | EXPORT_SYMBOL(ioremap_nocache); | 60 | EXPORT_SYMBOL(ioremap_nocache); |
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index ab829a22f8a4..30d148f34042 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c | |||
@@ -645,18 +645,31 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus) | |||
645 | int | 645 | int |
646 | pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) | 646 | pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) |
647 | { | 647 | { |
648 | unsigned long size = vma->vm_end - vma->vm_start; | ||
649 | pgprot_t prot; | ||
648 | char *addr; | 650 | char *addr; |
649 | 651 | ||
652 | /* | ||
653 | * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt | ||
654 | * for more details. | ||
655 | */ | ||
656 | if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size)) | ||
657 | return -EINVAL; | ||
658 | prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, | ||
659 | vma->vm_page_prot); | ||
660 | if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot))) | ||
661 | return -EINVAL; | ||
662 | |||
650 | addr = pci_get_legacy_mem(bus); | 663 | addr = pci_get_legacy_mem(bus); |
651 | if (IS_ERR(addr)) | 664 | if (IS_ERR(addr)) |
652 | return PTR_ERR(addr); | 665 | return PTR_ERR(addr); |
653 | 666 | ||
654 | vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; | 667 | vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; |
655 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | 668 | vma->vm_page_prot = prot; |
656 | vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO); | 669 | vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO); |
657 | 670 | ||
658 | if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, | 671 | if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, |
659 | vma->vm_end - vma->vm_start, vma->vm_page_prot)) | 672 | size, vma->vm_page_prot)) |
660 | return -EAGAIN; | 673 | return -EAGAIN; |
661 | 674 | ||
662 | return 0; | 675 | return 0; |