diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 21:15:06 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 21:15:06 -0400 |
commit | ae7a835cc546fc67df90edaaa0c48ae2b22a29fe (patch) | |
tree | b1235437fde066ab0f272f164d75dc1b98a244cf /arch/s390/mm | |
parent | cf39c8e5352b4fb9efedfe7e9acb566a85ed847c (diff) | |
parent | 6b9e4fa07443f5baf5bbd7ab043abd6976f8d7bc (diff) |
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Gleb Natapov:
"The highlights of the release are nested EPT and pv-ticketlocks
support (hypervisor part, guest part, which is most of the code, goes
through tip tree). Apart of that there are many fixes for all arches"
Fix up semantic conflicts as discussed in the pull request thread..
* 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (88 commits)
ARM: KVM: Add newlines to panic strings
ARM: KVM: Work around older compiler bug
ARM: KVM: Simplify tracepoint text
ARM: KVM: Fix kvm_set_pte assignment
ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256
ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs
ARM: KVM: vgic: fix GICD_ICFGRn access
ARM: KVM: vgic: simplify vgic_get_target_reg
KVM: MMU: remove unused parameter
KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate()
KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls
KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX
KVM: x86: update masterclock when kvmclock_offset is calculated (v2)
KVM: PPC: Book3S: Fix compile error in XICS emulation
KVM: PPC: Book3S PR: return appropriate error when allocation fails
arch: powerpc: kvm: add signed type cast for comparation
KVM: x86: add comments where MMIO does not return to the emulator
KVM: vmx: count exits to userspace during invalid guest emulation
KVM: rename __kvm_io_bus_sort_cmp to kvm_io_bus_cmp
kvm: optimize away THP checks in kvm_is_mmio_pfn()
...
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/pgtable.c | 183 |
1 files changed, 118 insertions, 65 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6d16132d0850..bf7c0dc64a76 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
335 | 335 | ||
336 | if ((from | to | len) & (PMD_SIZE - 1)) | 336 | if ((from | to | len) & (PMD_SIZE - 1)) |
337 | return -EINVAL; | 337 | return -EINVAL; |
338 | if (len == 0 || from + len > PGDIR_SIZE || | 338 | if (len == 0 || from + len > TASK_MAX_SIZE || |
339 | from + len < from || to + len < to) | 339 | from + len < from || to + len < to) |
340 | return -EINVAL; | 340 | return -EINVAL; |
341 | 341 | ||
@@ -732,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) | |||
732 | spin_unlock(&gmap_notifier_lock); | 732 | spin_unlock(&gmap_notifier_lock); |
733 | } | 733 | } |
734 | 734 | ||
735 | static inline int page_table_with_pgste(struct page *page) | ||
736 | { | ||
737 | return atomic_read(&page->_mapcount) == 0; | ||
738 | } | ||
739 | |||
735 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 740 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
736 | unsigned long vmaddr) | 741 | unsigned long vmaddr) |
737 | { | 742 | { |
@@ -751,7 +756,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | |||
751 | mp->vmaddr = vmaddr & PMD_MASK; | 756 | mp->vmaddr = vmaddr & PMD_MASK; |
752 | INIT_LIST_HEAD(&mp->mapper); | 757 | INIT_LIST_HEAD(&mp->mapper); |
753 | page->index = (unsigned long) mp; | 758 | page->index = (unsigned long) mp; |
754 | atomic_set(&page->_mapcount, 3); | 759 | atomic_set(&page->_mapcount, 0); |
755 | table = (unsigned long *) page_to_phys(page); | 760 | table = (unsigned long *) page_to_phys(page); |
756 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | 761 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); |
757 | clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, | 762 | clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, |
@@ -818,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key); | |||
818 | 823 | ||
819 | #else /* CONFIG_PGSTE */ | 824 | #else /* CONFIG_PGSTE */ |
820 | 825 | ||
826 | static inline int page_table_with_pgste(struct page *page) | ||
827 | { | ||
828 | return 0; | ||
829 | } | ||
830 | |||
821 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 831 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
822 | unsigned long vmaddr) | 832 | unsigned long vmaddr) |
823 | { | 833 | { |
@@ -894,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
894 | struct page *page; | 904 | struct page *page; |
895 | unsigned int bit, mask; | 905 | unsigned int bit, mask; |
896 | 906 | ||
897 | if (mm_has_pgste(mm)) { | 907 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
908 | if (page_table_with_pgste(page)) { | ||
898 | gmap_disconnect_pgtable(mm, table); | 909 | gmap_disconnect_pgtable(mm, table); |
899 | return page_table_free_pgste(table); | 910 | return page_table_free_pgste(table); |
900 | } | 911 | } |
901 | /* Free 1K/2K page table fragment of a 4K page */ | 912 | /* Free 1K/2K page table fragment of a 4K page */ |
902 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
903 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); | 913 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
904 | spin_lock_bh(&mm->context.list_lock); | 914 | spin_lock_bh(&mm->context.list_lock); |
905 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 915 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
@@ -937,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | |||
937 | unsigned int bit, mask; | 947 | unsigned int bit, mask; |
938 | 948 | ||
939 | mm = tlb->mm; | 949 | mm = tlb->mm; |
940 | if (mm_has_pgste(mm)) { | 950 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
951 | if (page_table_with_pgste(page)) { | ||
941 | gmap_disconnect_pgtable(mm, table); | 952 | gmap_disconnect_pgtable(mm, table); |
942 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | 953 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
943 | tlb_remove_table(tlb, table); | 954 | tlb_remove_table(tlb, table); |
944 | return; | 955 | return; |
945 | } | 956 | } |
946 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | 957 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); |
947 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
948 | spin_lock_bh(&mm->context.list_lock); | 958 | spin_lock_bh(&mm->context.list_lock); |
949 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 959 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
950 | list_del(&page->lru); | 960 | list_del(&page->lru); |
@@ -1030,36 +1040,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) | |||
1030 | } | 1040 | } |
1031 | 1041 | ||
1032 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 1042 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1033 | void thp_split_vma(struct vm_area_struct *vma) | 1043 | static inline void thp_split_vma(struct vm_area_struct *vma) |
1034 | { | 1044 | { |
1035 | unsigned long addr; | 1045 | unsigned long addr; |
1036 | struct page *page; | ||
1037 | 1046 | ||
1038 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { | 1047 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) |
1039 | page = follow_page(vma, addr, FOLL_SPLIT); | 1048 | follow_page(vma, addr, FOLL_SPLIT); |
1040 | } | ||
1041 | } | 1049 | } |
1042 | 1050 | ||
1043 | void thp_split_mm(struct mm_struct *mm) | 1051 | static inline void thp_split_mm(struct mm_struct *mm) |
1044 | { | 1052 | { |
1045 | struct vm_area_struct *vma = mm->mmap; | 1053 | struct vm_area_struct *vma; |
1046 | 1054 | ||
1047 | while (vma != NULL) { | 1055 | for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { |
1048 | thp_split_vma(vma); | 1056 | thp_split_vma(vma); |
1049 | vma->vm_flags &= ~VM_HUGEPAGE; | 1057 | vma->vm_flags &= ~VM_HUGEPAGE; |
1050 | vma->vm_flags |= VM_NOHUGEPAGE; | 1058 | vma->vm_flags |= VM_NOHUGEPAGE; |
1051 | vma = vma->vm_next; | ||
1052 | } | 1059 | } |
1060 | mm->def_flags |= VM_NOHUGEPAGE; | ||
1061 | } | ||
1062 | #else | ||
1063 | static inline void thp_split_mm(struct mm_struct *mm) | ||
1064 | { | ||
1053 | } | 1065 | } |
1054 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1066 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1055 | 1067 | ||
1068 | static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, | ||
1069 | struct mm_struct *mm, pud_t *pud, | ||
1070 | unsigned long addr, unsigned long end) | ||
1071 | { | ||
1072 | unsigned long next, *table, *new; | ||
1073 | struct page *page; | ||
1074 | pmd_t *pmd; | ||
1075 | |||
1076 | pmd = pmd_offset(pud, addr); | ||
1077 | do { | ||
1078 | next = pmd_addr_end(addr, end); | ||
1079 | again: | ||
1080 | if (pmd_none_or_clear_bad(pmd)) | ||
1081 | continue; | ||
1082 | table = (unsigned long *) pmd_deref(*pmd); | ||
1083 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
1084 | if (page_table_with_pgste(page)) | ||
1085 | continue; | ||
1086 | /* Allocate new page table with pgstes */ | ||
1087 | new = page_table_alloc_pgste(mm, addr); | ||
1088 | if (!new) { | ||
1089 | mm->context.has_pgste = 0; | ||
1090 | continue; | ||
1091 | } | ||
1092 | spin_lock(&mm->page_table_lock); | ||
1093 | if (likely((unsigned long *) pmd_deref(*pmd) == table)) { | ||
1094 | /* Nuke pmd entry pointing to the "short" page table */ | ||
1095 | pmdp_flush_lazy(mm, addr, pmd); | ||
1096 | pmd_clear(pmd); | ||
1097 | /* Copy ptes from old table to new table */ | ||
1098 | memcpy(new, table, PAGE_SIZE/2); | ||
1099 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | ||
1100 | /* Establish new table */ | ||
1101 | pmd_populate(mm, pmd, (pte_t *) new); | ||
1102 | /* Free old table with rcu, there might be a walker! */ | ||
1103 | page_table_free_rcu(tlb, table); | ||
1104 | new = NULL; | ||
1105 | } | ||
1106 | spin_unlock(&mm->page_table_lock); | ||
1107 | if (new) { | ||
1108 | page_table_free_pgste(new); | ||
1109 | goto again; | ||
1110 | } | ||
1111 | } while (pmd++, addr = next, addr != end); | ||
1112 | |||
1113 | return addr; | ||
1114 | } | ||
1115 | |||
1116 | static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, | ||
1117 | struct mm_struct *mm, pgd_t *pgd, | ||
1118 | unsigned long addr, unsigned long end) | ||
1119 | { | ||
1120 | unsigned long next; | ||
1121 | pud_t *pud; | ||
1122 | |||
1123 | pud = pud_offset(pgd, addr); | ||
1124 | do { | ||
1125 | next = pud_addr_end(addr, end); | ||
1126 | if (pud_none_or_clear_bad(pud)) | ||
1127 | continue; | ||
1128 | next = page_table_realloc_pmd(tlb, mm, pud, addr, next); | ||
1129 | } while (pud++, addr = next, addr != end); | ||
1130 | |||
1131 | return addr; | ||
1132 | } | ||
1133 | |||
1134 | static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, | ||
1135 | unsigned long addr, unsigned long end) | ||
1136 | { | ||
1137 | unsigned long next; | ||
1138 | pgd_t *pgd; | ||
1139 | |||
1140 | pgd = pgd_offset(mm, addr); | ||
1141 | do { | ||
1142 | next = pgd_addr_end(addr, end); | ||
1143 | if (pgd_none_or_clear_bad(pgd)) | ||
1144 | continue; | ||
1145 | next = page_table_realloc_pud(tlb, mm, pgd, addr, next); | ||
1146 | } while (pgd++, addr = next, addr != end); | ||
1147 | } | ||
1148 | |||
1056 | /* | 1149 | /* |
1057 | * switch on pgstes for its userspace process (for kvm) | 1150 | * switch on pgstes for its userspace process (for kvm) |
1058 | */ | 1151 | */ |
1059 | int s390_enable_sie(void) | 1152 | int s390_enable_sie(void) |
1060 | { | 1153 | { |
1061 | struct task_struct *tsk = current; | 1154 | struct task_struct *tsk = current; |
1062 | struct mm_struct *mm, *old_mm; | 1155 | struct mm_struct *mm = tsk->mm; |
1156 | struct mmu_gather tlb; | ||
1063 | 1157 | ||
1064 | /* Do we have switched amode? If no, we cannot do sie */ | 1158 | /* Do we have switched amode? If no, we cannot do sie */ |
1065 | if (s390_user_mode == HOME_SPACE_MODE) | 1159 | if (s390_user_mode == HOME_SPACE_MODE) |
@@ -1069,57 +1163,16 @@ int s390_enable_sie(void) | |||
1069 | if (mm_has_pgste(tsk->mm)) | 1163 | if (mm_has_pgste(tsk->mm)) |
1070 | return 0; | 1164 | return 0; |
1071 | 1165 | ||
1072 | /* lets check if we are allowed to replace the mm */ | 1166 | down_write(&mm->mmap_sem); |
1073 | task_lock(tsk); | ||
1074 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | ||
1075 | #ifdef CONFIG_AIO | ||
1076 | !hlist_empty(&tsk->mm->ioctx_list) || | ||
1077 | #endif | ||
1078 | tsk->mm != tsk->active_mm) { | ||
1079 | task_unlock(tsk); | ||
1080 | return -EINVAL; | ||
1081 | } | ||
1082 | task_unlock(tsk); | ||
1083 | |||
1084 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ | ||
1085 | tsk->mm->context.alloc_pgste = 1; | ||
1086 | /* make sure that both mms have a correct rss state */ | ||
1087 | sync_mm_rss(tsk->mm); | ||
1088 | mm = dup_mm(tsk); | ||
1089 | tsk->mm->context.alloc_pgste = 0; | ||
1090 | if (!mm) | ||
1091 | return -ENOMEM; | ||
1092 | |||
1093 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1094 | /* split thp mappings and disable thp for future mappings */ | 1167 | /* split thp mappings and disable thp for future mappings */ |
1095 | thp_split_mm(mm); | 1168 | thp_split_mm(mm); |
1096 | mm->def_flags |= VM_NOHUGEPAGE; | 1169 | /* Reallocate the page tables with pgstes */ |
1097 | #endif | 1170 | mm->context.has_pgste = 1; |
1098 | 1171 | tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); | |
1099 | /* Now lets check again if something happened */ | 1172 | page_table_realloc(&tlb, mm, 0, TASK_SIZE); |
1100 | task_lock(tsk); | 1173 | tlb_finish_mmu(&tlb, 0, TASK_SIZE); |
1101 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 1174 | up_write(&mm->mmap_sem); |
1102 | #ifdef CONFIG_AIO | 1175 | return mm->context.has_pgste ? 0 : -ENOMEM; |
1103 | !hlist_empty(&tsk->mm->ioctx_list) || | ||
1104 | #endif | ||
1105 | tsk->mm != tsk->active_mm) { | ||
1106 | mmput(mm); | ||
1107 | task_unlock(tsk); | ||
1108 | return -EINVAL; | ||
1109 | } | ||
1110 | |||
1111 | /* ok, we are alone. No ptrace, no threads, etc. */ | ||
1112 | old_mm = tsk->mm; | ||
1113 | tsk->mm = tsk->active_mm = mm; | ||
1114 | preempt_disable(); | ||
1115 | update_mm(mm, tsk); | ||
1116 | atomic_inc(&mm->context.attach_count); | ||
1117 | atomic_dec(&old_mm->context.attach_count); | ||
1118 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | ||
1119 | preempt_enable(); | ||
1120 | task_unlock(tsk); | ||
1121 | mmput(old_mm); | ||
1122 | return 0; | ||
1123 | } | 1176 | } |
1124 | EXPORT_SYMBOL_GPL(s390_enable_sie); | 1177 | EXPORT_SYMBOL_GPL(s390_enable_sie); |
1125 | 1178 | ||