aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 21:15:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 21:15:06 -0400
commitae7a835cc546fc67df90edaaa0c48ae2b22a29fe (patch)
treeb1235437fde066ab0f272f164d75dc1b98a244cf /arch/s390/mm
parentcf39c8e5352b4fb9efedfe7e9acb566a85ed847c (diff)
parent6b9e4fa07443f5baf5bbd7ab043abd6976f8d7bc (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Gleb Natapov: "The highlights of the release are nested EPT and pv-ticketlocks support (hypervisor part, guest part, which is most of the code, goes through tip tree). Apart of that there are many fixes for all arches" Fix up semantic conflicts as discussed in the pull request thread.. * 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (88 commits) ARM: KVM: Add newlines to panic strings ARM: KVM: Work around older compiler bug ARM: KVM: Simplify tracepoint text ARM: KVM: Fix kvm_set_pte assignment ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256 ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs ARM: KVM: vgic: fix GICD_ICFGRn access ARM: KVM: vgic: simplify vgic_get_target_reg KVM: MMU: remove unused parameter KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate() KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX KVM: x86: update masterclock when kvmclock_offset is calculated (v2) KVM: PPC: Book3S: Fix compile error in XICS emulation KVM: PPC: Book3S PR: return appropriate error when allocation fails arch: powerpc: kvm: add signed type cast for comparation KVM: x86: add comments where MMIO does not return to the emulator KVM: vmx: count exits to userspace during invalid guest emulation KVM: rename __kvm_io_bus_sort_cmp to kvm_io_bus_cmp kvm: optimize away THP checks in kvm_is_mmio_pfn() ...
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/pgtable.c183
1 files changed, 118 insertions, 65 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6d16132d0850..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
335 335
336 if ((from | to | len) & (PMD_SIZE - 1)) 336 if ((from | to | len) & (PMD_SIZE - 1))
337 return -EINVAL; 337 return -EINVAL;
338 if (len == 0 || from + len > PGDIR_SIZE || 338 if (len == 0 || from + len > TASK_MAX_SIZE ||
339 from + len < from || to + len < to) 339 from + len < from || to + len < to)
340 return -EINVAL; 340 return -EINVAL;
341 341
@@ -732,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
732 spin_unlock(&gmap_notifier_lock); 732 spin_unlock(&gmap_notifier_lock);
733} 733}
734 734
735static inline int page_table_with_pgste(struct page *page)
736{
737 return atomic_read(&page->_mapcount) == 0;
738}
739
735static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 740static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
736 unsigned long vmaddr) 741 unsigned long vmaddr)
737{ 742{
@@ -751,7 +756,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
751 mp->vmaddr = vmaddr & PMD_MASK; 756 mp->vmaddr = vmaddr & PMD_MASK;
752 INIT_LIST_HEAD(&mp->mapper); 757 INIT_LIST_HEAD(&mp->mapper);
753 page->index = (unsigned long) mp; 758 page->index = (unsigned long) mp;
754 atomic_set(&page->_mapcount, 3); 759 atomic_set(&page->_mapcount, 0);
755 table = (unsigned long *) page_to_phys(page); 760 table = (unsigned long *) page_to_phys(page);
756 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); 761 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
757 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, 762 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
@@ -818,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
818 823
819#else /* CONFIG_PGSTE */ 824#else /* CONFIG_PGSTE */
820 825
826static inline int page_table_with_pgste(struct page *page)
827{
828 return 0;
829}
830
821static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 831static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
822 unsigned long vmaddr) 832 unsigned long vmaddr)
823{ 833{
@@ -894,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
894 struct page *page; 904 struct page *page;
895 unsigned int bit, mask; 905 unsigned int bit, mask;
896 906
897 if (mm_has_pgste(mm)) { 907 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
908 if (page_table_with_pgste(page)) {
898 gmap_disconnect_pgtable(mm, table); 909 gmap_disconnect_pgtable(mm, table);
899 return page_table_free_pgste(table); 910 return page_table_free_pgste(table);
900 } 911 }
901 /* Free 1K/2K page table fragment of a 4K page */ 912 /* Free 1K/2K page table fragment of a 4K page */
902 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
903 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 913 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
904 spin_lock_bh(&mm->context.list_lock); 914 spin_lock_bh(&mm->context.list_lock);
905 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 915 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -937,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
937 unsigned int bit, mask; 947 unsigned int bit, mask;
938 948
939 mm = tlb->mm; 949 mm = tlb->mm;
940 if (mm_has_pgste(mm)) { 950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 if (page_table_with_pgste(page)) {
941 gmap_disconnect_pgtable(mm, table); 952 gmap_disconnect_pgtable(mm, table);
942 table = (unsigned long *) (__pa(table) | FRAG_MASK); 953 table = (unsigned long *) (__pa(table) | FRAG_MASK);
943 tlb_remove_table(tlb, table); 954 tlb_remove_table(tlb, table);
944 return; 955 return;
945 } 956 }
946 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 957 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
947 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
948 spin_lock_bh(&mm->context.list_lock); 958 spin_lock_bh(&mm->context.list_lock);
949 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 959 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
950 list_del(&page->lru); 960 list_del(&page->lru);
@@ -1030,36 +1040,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1030} 1040}
1031 1041
1032#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1042#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1033void thp_split_vma(struct vm_area_struct *vma) 1043static inline void thp_split_vma(struct vm_area_struct *vma)
1034{ 1044{
1035 unsigned long addr; 1045 unsigned long addr;
1036 struct page *page;
1037 1046
1038 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1047 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1039 page = follow_page(vma, addr, FOLL_SPLIT); 1048 follow_page(vma, addr, FOLL_SPLIT);
1040 }
1041} 1049}
1042 1050
1043void thp_split_mm(struct mm_struct *mm) 1051static inline void thp_split_mm(struct mm_struct *mm)
1044{ 1052{
1045 struct vm_area_struct *vma = mm->mmap; 1053 struct vm_area_struct *vma;
1046 1054
1047 while (vma != NULL) { 1055 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1048 thp_split_vma(vma); 1056 thp_split_vma(vma);
1049 vma->vm_flags &= ~VM_HUGEPAGE; 1057 vma->vm_flags &= ~VM_HUGEPAGE;
1050 vma->vm_flags |= VM_NOHUGEPAGE; 1058 vma->vm_flags |= VM_NOHUGEPAGE;
1051 vma = vma->vm_next;
1052 } 1059 }
1060 mm->def_flags |= VM_NOHUGEPAGE;
1061}
1062#else
1063static inline void thp_split_mm(struct mm_struct *mm)
1064{
1053} 1065}
1054#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1066#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1055 1067
1068static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1069 struct mm_struct *mm, pud_t *pud,
1070 unsigned long addr, unsigned long end)
1071{
1072 unsigned long next, *table, *new;
1073 struct page *page;
1074 pmd_t *pmd;
1075
1076 pmd = pmd_offset(pud, addr);
1077 do {
1078 next = pmd_addr_end(addr, end);
1079again:
1080 if (pmd_none_or_clear_bad(pmd))
1081 continue;
1082 table = (unsigned long *) pmd_deref(*pmd);
1083 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1084 if (page_table_with_pgste(page))
1085 continue;
1086 /* Allocate new page table with pgstes */
1087 new = page_table_alloc_pgste(mm, addr);
1088 if (!new) {
1089 mm->context.has_pgste = 0;
1090 continue;
1091 }
1092 spin_lock(&mm->page_table_lock);
1093 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1094 /* Nuke pmd entry pointing to the "short" page table */
1095 pmdp_flush_lazy(mm, addr, pmd);
1096 pmd_clear(pmd);
1097 /* Copy ptes from old table to new table */
1098 memcpy(new, table, PAGE_SIZE/2);
1099 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1100 /* Establish new table */
1101 pmd_populate(mm, pmd, (pte_t *) new);
1102 /* Free old table with rcu, there might be a walker! */
1103 page_table_free_rcu(tlb, table);
1104 new = NULL;
1105 }
1106 spin_unlock(&mm->page_table_lock);
1107 if (new) {
1108 page_table_free_pgste(new);
1109 goto again;
1110 }
1111 } while (pmd++, addr = next, addr != end);
1112
1113 return addr;
1114}
1115
1116static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1117 struct mm_struct *mm, pgd_t *pgd,
1118 unsigned long addr, unsigned long end)
1119{
1120 unsigned long next;
1121 pud_t *pud;
1122
1123 pud = pud_offset(pgd, addr);
1124 do {
1125 next = pud_addr_end(addr, end);
1126 if (pud_none_or_clear_bad(pud))
1127 continue;
1128 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
1129 } while (pud++, addr = next, addr != end);
1130
1131 return addr;
1132}
1133
1134static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1135 unsigned long addr, unsigned long end)
1136{
1137 unsigned long next;
1138 pgd_t *pgd;
1139
1140 pgd = pgd_offset(mm, addr);
1141 do {
1142 next = pgd_addr_end(addr, end);
1143 if (pgd_none_or_clear_bad(pgd))
1144 continue;
1145 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end);
1147}
1148
1056/* 1149/*
1057 * switch on pgstes for its userspace process (for kvm) 1150 * switch on pgstes for its userspace process (for kvm)
1058 */ 1151 */
1059int s390_enable_sie(void) 1152int s390_enable_sie(void)
1060{ 1153{
1061 struct task_struct *tsk = current; 1154 struct task_struct *tsk = current;
1062 struct mm_struct *mm, *old_mm; 1155 struct mm_struct *mm = tsk->mm;
1156 struct mmu_gather tlb;
1063 1157
1064 /* Do we have switched amode? If no, we cannot do sie */ 1158 /* Do we have switched amode? If no, we cannot do sie */
1065 if (s390_user_mode == HOME_SPACE_MODE) 1159 if (s390_user_mode == HOME_SPACE_MODE)
@@ -1069,57 +1163,16 @@ int s390_enable_sie(void)
1069 if (mm_has_pgste(tsk->mm)) 1163 if (mm_has_pgste(tsk->mm))
1070 return 0; 1164 return 0;
1071 1165
1072 /* lets check if we are allowed to replace the mm */ 1166 down_write(&mm->mmap_sem);
1073 task_lock(tsk);
1074 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
1075#ifdef CONFIG_AIO
1076 !hlist_empty(&tsk->mm->ioctx_list) ||
1077#endif
1078 tsk->mm != tsk->active_mm) {
1079 task_unlock(tsk);
1080 return -EINVAL;
1081 }
1082 task_unlock(tsk);
1083
1084 /* we copy the mm and let dup_mm create the page tables with_pgstes */
1085 tsk->mm->context.alloc_pgste = 1;
1086 /* make sure that both mms have a correct rss state */
1087 sync_mm_rss(tsk->mm);
1088 mm = dup_mm(tsk);
1089 tsk->mm->context.alloc_pgste = 0;
1090 if (!mm)
1091 return -ENOMEM;
1092
1093#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1094 /* split thp mappings and disable thp for future mappings */ 1167 /* split thp mappings and disable thp for future mappings */
1095 thp_split_mm(mm); 1168 thp_split_mm(mm);
1096 mm->def_flags |= VM_NOHUGEPAGE; 1169 /* Reallocate the page tables with pgstes */
1097#endif 1170 mm->context.has_pgste = 1;
1098 1171 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
1099 /* Now lets check again if something happened */ 1172 page_table_realloc(&tlb, mm, 0, TASK_SIZE);
1100 task_lock(tsk); 1173 tlb_finish_mmu(&tlb, 0, TASK_SIZE);
1101 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 1174 up_write(&mm->mmap_sem);
1102#ifdef CONFIG_AIO 1175 return mm->context.has_pgste ? 0 : -ENOMEM;
1103 !hlist_empty(&tsk->mm->ioctx_list) ||
1104#endif
1105 tsk->mm != tsk->active_mm) {
1106 mmput(mm);
1107 task_unlock(tsk);
1108 return -EINVAL;
1109 }
1110
1111 /* ok, we are alone. No ptrace, no threads, etc. */
1112 old_mm = tsk->mm;
1113 tsk->mm = tsk->active_mm = mm;
1114 preempt_disable();
1115 update_mm(mm, tsk);
1116 atomic_inc(&mm->context.attach_count);
1117 atomic_dec(&old_mm->context.attach_count);
1118 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
1119 preempt_enable();
1120 task_unlock(tsk);
1121 mmput(old_mm);
1122 return 0;
1123} 1176}
1124EXPORT_SYMBOL_GPL(s390_enable_sie); 1177EXPORT_SYMBOL_GPL(s390_enable_sie);
1125 1178