aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2013-07-26 09:04:02 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2013-07-29 03:03:09 -0400
commit3eabaee998c787e7e1565574821652548f7fc003 (patch)
tree8e36fdfda46ec4c0a5b2a419a39fa2f5c1ba0f8e /arch/s390/mm
parent663f4c61b8036fd3a80debbe00b58d198ae63e76 (diff)
KVM: s390: allow sie enablement for multi-threaded programs
Improve the code to upgrade the standard 2K page tables to 4K page tables with PGSTEs to allow the operation to happen when the program is already multi-threaded. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/pgtable.c181
1 files changed, 117 insertions, 64 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a8154a1a2c94..6d332487f363 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -731,6 +731,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
731 spin_unlock(&gmap_notifier_lock); 731 spin_unlock(&gmap_notifier_lock);
732} 732}
733 733
734static inline int page_table_with_pgste(struct page *page)
735{
736 return atomic_read(&page->_mapcount) == 0;
737}
738
734static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 739static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
735 unsigned long vmaddr) 740 unsigned long vmaddr)
736{ 741{
@@ -750,7 +755,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
750 mp->vmaddr = vmaddr & PMD_MASK; 755 mp->vmaddr = vmaddr & PMD_MASK;
751 INIT_LIST_HEAD(&mp->mapper); 756 INIT_LIST_HEAD(&mp->mapper);
752 page->index = (unsigned long) mp; 757 page->index = (unsigned long) mp;
753 atomic_set(&page->_mapcount, 3); 758 atomic_set(&page->_mapcount, 0);
754 table = (unsigned long *) page_to_phys(page); 759 table = (unsigned long *) page_to_phys(page);
755 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 760 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
756 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 761 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
@@ -821,6 +826,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
821 826
822#else /* CONFIG_PGSTE */ 827#else /* CONFIG_PGSTE */
823 828
829static inline int page_table_with_pgste(struct page *page)
830{
831 return 0;
832}
833
824static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 834static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
825 unsigned long vmaddr) 835 unsigned long vmaddr)
826{ 836{
@@ -897,12 +907,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
897 struct page *page; 907 struct page *page;
898 unsigned int bit, mask; 908 unsigned int bit, mask;
899 909
900 if (mm_has_pgste(mm)) { 910 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
911 if (page_table_with_pgste(page)) {
901 gmap_disconnect_pgtable(mm, table); 912 gmap_disconnect_pgtable(mm, table);
902 return page_table_free_pgste(table); 913 return page_table_free_pgste(table);
903 } 914 }
904 /* Free 1K/2K page table fragment of a 4K page */ 915 /* Free 1K/2K page table fragment of a 4K page */
905 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
906 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 916 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
907 spin_lock_bh(&mm->context.list_lock); 917 spin_lock_bh(&mm->context.list_lock);
908 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 918 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -940,14 +950,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
940 unsigned int bit, mask; 950 unsigned int bit, mask;
941 951
942 mm = tlb->mm; 952 mm = tlb->mm;
943 if (mm_has_pgste(mm)) { 953 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
954 if (page_table_with_pgste(page)) {
944 gmap_disconnect_pgtable(mm, table); 955 gmap_disconnect_pgtable(mm, table);
945 table = (unsigned long *) (__pa(table) | FRAG_MASK); 956 table = (unsigned long *) (__pa(table) | FRAG_MASK);
946 tlb_remove_table(tlb, table); 957 tlb_remove_table(tlb, table);
947 return; 958 return;
948 } 959 }
949 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 960 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 spin_lock_bh(&mm->context.list_lock); 961 spin_lock_bh(&mm->context.list_lock);
952 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 962 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
953 list_del(&page->lru); 963 list_del(&page->lru);
@@ -1033,36 +1043,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1033} 1043}
1034 1044
1035#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1045#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1036void thp_split_vma(struct vm_area_struct *vma) 1046static inline void thp_split_vma(struct vm_area_struct *vma)
1037{ 1047{
1038 unsigned long addr; 1048 unsigned long addr;
1039 struct page *page;
1040 1049
1041 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1050 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1042 page = follow_page(vma, addr, FOLL_SPLIT); 1051 follow_page(vma, addr, FOLL_SPLIT);
1043 }
1044} 1052}
1045 1053
1046void thp_split_mm(struct mm_struct *mm) 1054static inline void thp_split_mm(struct mm_struct *mm)
1047{ 1055{
1048 struct vm_area_struct *vma = mm->mmap; 1056 struct vm_area_struct *vma;
1049 1057
1050 while (vma != NULL) { 1058 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1051 thp_split_vma(vma); 1059 thp_split_vma(vma);
1052 vma->vm_flags &= ~VM_HUGEPAGE; 1060 vma->vm_flags &= ~VM_HUGEPAGE;
1053 vma->vm_flags |= VM_NOHUGEPAGE; 1061 vma->vm_flags |= VM_NOHUGEPAGE;
1054 vma = vma->vm_next;
1055 } 1062 }
1063 mm->def_flags |= VM_NOHUGEPAGE;
1064}
1065#else
1066static inline void thp_split_mm(struct mm_struct *mm)
1067{
1056} 1068}
1057#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1069#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1058 1070
1071static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1072 struct mm_struct *mm, pud_t *pud,
1073 unsigned long addr, unsigned long end)
1074{
1075 unsigned long next, *table, *new;
1076 struct page *page;
1077 pmd_t *pmd;
1078
1079 pmd = pmd_offset(pud, addr);
1080 do {
1081 next = pmd_addr_end(addr, end);
1082again:
1083 if (pmd_none_or_clear_bad(pmd))
1084 continue;
1085 table = (unsigned long *) pmd_deref(*pmd);
1086 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1087 if (page_table_with_pgste(page))
1088 continue;
1089 /* Allocate new page table with pgstes */
1090 new = page_table_alloc_pgste(mm, addr);
1091 if (!new) {
1092 mm->context.has_pgste = 0;
1093 continue;
1094 }
1095 spin_lock(&mm->page_table_lock);
1096 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1097 /* Nuke pmd entry pointing to the "short" page table */
1098 pmdp_flush_lazy(mm, addr, pmd);
1099 pmd_clear(pmd);
1100 /* Copy ptes from old table to new table */
1101 memcpy(new, table, PAGE_SIZE/2);
1102 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1103 /* Establish new table */
1104 pmd_populate(mm, pmd, (pte_t *) new);
1105 /* Free old table with rcu, there might be a walker! */
1106 page_table_free_rcu(tlb, table);
1107 new = NULL;
1108 }
1109 spin_unlock(&mm->page_table_lock);
1110 if (new) {
1111 page_table_free_pgste(new);
1112 goto again;
1113 }
1114 } while (pmd++, addr = next, addr != end);
1115
1116 return addr;
1117}
1118
1119static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1120 struct mm_struct *mm, pgd_t *pgd,
1121 unsigned long addr, unsigned long end)
1122{
1123 unsigned long next;
1124 pud_t *pud;
1125
1126 pud = pud_offset(pgd, addr);
1127 do {
1128 next = pud_addr_end(addr, end);
1129 if (pud_none_or_clear_bad(pud))
1130 continue;
1131 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
1132 } while (pud++, addr = next, addr != end);
1133
1134 return addr;
1135}
1136
1137static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1138 unsigned long addr, unsigned long end)
1139{
1140 unsigned long next;
1141 pgd_t *pgd;
1142
1143 pgd = pgd_offset(mm, addr);
1144 do {
1145 next = pgd_addr_end(addr, end);
1146 if (pgd_none_or_clear_bad(pgd))
1147 continue;
1148 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
1149 } while (pgd++, addr = next, addr != end);
1150}
1151
1059/* 1152/*
1060 * switch on pgstes for its userspace process (for kvm) 1153 * switch on pgstes for its userspace process (for kvm)
1061 */ 1154 */
1062int s390_enable_sie(void) 1155int s390_enable_sie(void)
1063{ 1156{
1064 struct task_struct *tsk = current; 1157 struct task_struct *tsk = current;
1065 struct mm_struct *mm, *old_mm; 1158 struct mm_struct *mm = tsk->mm;
1159 struct mmu_gather tlb;
1066 1160
1067 /* Do we have switched amode? If no, we cannot do sie */ 1161 /* Do we have switched amode? If no, we cannot do sie */
1068 if (s390_user_mode == HOME_SPACE_MODE) 1162 if (s390_user_mode == HOME_SPACE_MODE)
@@ -1072,57 +1166,16 @@ int s390_enable_sie(void)
1072 if (mm_has_pgste(tsk->mm)) 1166 if (mm_has_pgste(tsk->mm))
1073 return 0; 1167 return 0;
1074 1168
1075 /* lets check if we are allowed to replace the mm */ 1169 down_write(&mm->mmap_sem);
1076 task_lock(tsk);
1077 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
1078#ifdef CONFIG_AIO
1079 !hlist_empty(&tsk->mm->ioctx_list) ||
1080#endif
1081 tsk->mm != tsk->active_mm) {
1082 task_unlock(tsk);
1083 return -EINVAL;
1084 }
1085 task_unlock(tsk);
1086
1087 /* we copy the mm and let dup_mm create the page tables with_pgstes */
1088 tsk->mm->context.alloc_pgste = 1;
1089 /* make sure that both mms have a correct rss state */
1090 sync_mm_rss(tsk->mm);
1091 mm = dup_mm(tsk);
1092 tsk->mm->context.alloc_pgste = 0;
1093 if (!mm)
1094 return -ENOMEM;
1095
1096#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1097 /* split thp mappings and disable thp for future mappings */ 1170 /* split thp mappings and disable thp for future mappings */
1098 thp_split_mm(mm); 1171 thp_split_mm(mm);
1099 mm->def_flags |= VM_NOHUGEPAGE; 1172 /* Reallocate the page tables with pgstes */
1100#endif 1173 mm->context.has_pgste = 1;
1101 1174 tlb_gather_mmu(&tlb, mm, 0);
1102 /* Now lets check again if something happened */ 1175 page_table_realloc(&tlb, mm, 0, TASK_SIZE);
1103 task_lock(tsk); 1176 tlb_finish_mmu(&tlb, 0, -1);
1104 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 1177 up_write(&mm->mmap_sem);
1105#ifdef CONFIG_AIO 1178 return mm->context.has_pgste ? 0 : -ENOMEM;
1106 !hlist_empty(&tsk->mm->ioctx_list) ||
1107#endif
1108 tsk->mm != tsk->active_mm) {
1109 mmput(mm);
1110 task_unlock(tsk);
1111 return -EINVAL;
1112 }
1113
1114 /* ok, we are alone. No ptrace, no threads, etc. */
1115 old_mm = tsk->mm;
1116 tsk->mm = tsk->active_mm = mm;
1117 preempt_disable();
1118 update_mm(mm, tsk);
1119 atomic_inc(&mm->context.attach_count);
1120 atomic_dec(&old_mm->context.attach_count);
1121 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
1122 preempt_enable();
1123 task_unlock(tsk);
1124 mmput(old_mm);
1125 return 0;
1126} 1179}
1127EXPORT_SYMBOL_GPL(s390_enable_sie); 1180EXPORT_SYMBOL_GPL(s390_enable_sie);
1128 1181