aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/mm/pgtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r--arch/s390/mm/pgtable.c266
1 files changed, 158 insertions, 108 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a8154a1a2c94..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
161 struct gmap_rmap *rmap; 161 struct gmap_rmap *rmap;
162 struct page *page; 162 struct page *page;
163 163
164 if (*table & _SEGMENT_ENTRY_INV) 164 if (*table & _SEGMENT_ENTRY_INVALID)
165 return 0; 165 return 0;
166 page = pfn_to_page(*table >> PAGE_SHIFT); 166 page = pfn_to_page(*table >> PAGE_SHIFT);
167 mp = (struct gmap_pgtable *) page->index; 167 mp = (struct gmap_pgtable *) page->index;
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
172 kfree(rmap); 172 kfree(rmap);
173 break; 173 break;
174 } 174 }
175 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 175 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
176 return 1; 176 return 1;
177} 177}
178 178
@@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap,
258 return -ENOMEM; 258 return -ENOMEM;
259 new = (unsigned long *) page_to_phys(page); 259 new = (unsigned long *) page_to_phys(page);
260 crst_table_init(new, init); 260 crst_table_init(new, init);
261 if (*table & _REGION_ENTRY_INV) { 261 if (*table & _REGION_ENTRY_INVALID) {
262 list_add(&page->lru, &gmap->crst_list); 262 list_add(&page->lru, &gmap->crst_list);
263 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 263 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
264 (*table & _REGION_ENTRY_TYPE_MASK); 264 (*table & _REGION_ENTRY_TYPE_MASK);
@@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
292 for (off = 0; off < len; off += PMD_SIZE) { 292 for (off = 0; off < len; off += PMD_SIZE) {
293 /* Walk the guest addr space page table */ 293 /* Walk the guest addr space page table */
294 table = gmap->table + (((to + off) >> 53) & 0x7ff); 294 table = gmap->table + (((to + off) >> 53) & 0x7ff);
295 if (*table & _REGION_ENTRY_INV) 295 if (*table & _REGION_ENTRY_INVALID)
296 goto out; 296 goto out;
297 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 297 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
298 table = table + (((to + off) >> 42) & 0x7ff); 298 table = table + (((to + off) >> 42) & 0x7ff);
299 if (*table & _REGION_ENTRY_INV) 299 if (*table & _REGION_ENTRY_INVALID)
300 goto out; 300 goto out;
301 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 301 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
302 table = table + (((to + off) >> 31) & 0x7ff); 302 table = table + (((to + off) >> 31) & 0x7ff);
303 if (*table & _REGION_ENTRY_INV) 303 if (*table & _REGION_ENTRY_INVALID)
304 goto out; 304 goto out;
305 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 305 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
306 table = table + (((to + off) >> 20) & 0x7ff); 306 table = table + (((to + off) >> 20) & 0x7ff);
307 307
308 /* Clear segment table entry in guest address space. */ 308 /* Clear segment table entry in guest address space. */
309 flush |= gmap_unlink_segment(gmap, table); 309 flush |= gmap_unlink_segment(gmap, table);
310 *table = _SEGMENT_ENTRY_INV; 310 *table = _SEGMENT_ENTRY_INVALID;
311 } 311 }
312out: 312out:
313 spin_unlock(&gmap->mm->page_table_lock); 313 spin_unlock(&gmap->mm->page_table_lock);
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
335 335
336 if ((from | to | len) & (PMD_SIZE - 1)) 336 if ((from | to | len) & (PMD_SIZE - 1))
337 return -EINVAL; 337 return -EINVAL;
338 if (len == 0 || from + len > PGDIR_SIZE || 338 if (len == 0 || from + len > TASK_MAX_SIZE ||
339 from + len < from || to + len < to) 339 from + len < from || to + len < to)
340 return -EINVAL; 340 return -EINVAL;
341 341
@@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
345 for (off = 0; off < len; off += PMD_SIZE) { 345 for (off = 0; off < len; off += PMD_SIZE) {
346 /* Walk the gmap address space page table */ 346 /* Walk the gmap address space page table */
347 table = gmap->table + (((to + off) >> 53) & 0x7ff); 347 table = gmap->table + (((to + off) >> 53) & 0x7ff);
348 if ((*table & _REGION_ENTRY_INV) && 348 if ((*table & _REGION_ENTRY_INVALID) &&
349 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 349 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
350 goto out_unmap; 350 goto out_unmap;
351 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 351 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
352 table = table + (((to + off) >> 42) & 0x7ff); 352 table = table + (((to + off) >> 42) & 0x7ff);
353 if ((*table & _REGION_ENTRY_INV) && 353 if ((*table & _REGION_ENTRY_INVALID) &&
354 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 354 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
355 goto out_unmap; 355 goto out_unmap;
356 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 356 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
357 table = table + (((to + off) >> 31) & 0x7ff); 357 table = table + (((to + off) >> 31) & 0x7ff);
358 if ((*table & _REGION_ENTRY_INV) && 358 if ((*table & _REGION_ENTRY_INVALID) &&
359 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 359 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
360 goto out_unmap; 360 goto out_unmap;
361 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 361 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
363 363
364 /* Store 'from' address in an invalid segment table entry. */ 364 /* Store 'from' address in an invalid segment table entry. */
365 flush |= gmap_unlink_segment(gmap, table); 365 flush |= gmap_unlink_segment(gmap, table);
366 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 366 *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
367 _SEGMENT_ENTRY_PROTECT);
367 } 368 }
368 spin_unlock(&gmap->mm->page_table_lock); 369 spin_unlock(&gmap->mm->page_table_lock);
369 up_read(&gmap->mm->mmap_sem); 370 up_read(&gmap->mm->mmap_sem);
@@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
384 unsigned long *table; 385 unsigned long *table;
385 386
386 table = gmap->table + ((address >> 53) & 0x7ff); 387 table = gmap->table + ((address >> 53) & 0x7ff);
387 if (unlikely(*table & _REGION_ENTRY_INV)) 388 if (unlikely(*table & _REGION_ENTRY_INVALID))
388 return ERR_PTR(-EFAULT); 389 return ERR_PTR(-EFAULT);
389 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 390 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
390 table = table + ((address >> 42) & 0x7ff); 391 table = table + ((address >> 42) & 0x7ff);
391 if (unlikely(*table & _REGION_ENTRY_INV)) 392 if (unlikely(*table & _REGION_ENTRY_INVALID))
392 return ERR_PTR(-EFAULT); 393 return ERR_PTR(-EFAULT);
393 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 394 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
394 table = table + ((address >> 31) & 0x7ff); 395 table = table + ((address >> 31) & 0x7ff);
395 if (unlikely(*table & _REGION_ENTRY_INV)) 396 if (unlikely(*table & _REGION_ENTRY_INVALID))
396 return ERR_PTR(-EFAULT); 397 return ERR_PTR(-EFAULT);
397 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 398 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
398 table = table + ((address >> 20) & 0x7ff); 399 table = table + ((address >> 20) & 0x7ff);
@@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
422 return PTR_ERR(segment_ptr); 423 return PTR_ERR(segment_ptr);
423 /* Convert the gmap address to an mm address. */ 424 /* Convert the gmap address to an mm address. */
424 segment = *segment_ptr; 425 segment = *segment_ptr;
425 if (!(segment & _SEGMENT_ENTRY_INV)) { 426 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
426 page = pfn_to_page(segment >> PAGE_SHIFT); 427 page = pfn_to_page(segment >> PAGE_SHIFT);
427 mp = (struct gmap_pgtable *) page->index; 428 mp = (struct gmap_pgtable *) page->index;
428 return mp->vmaddr | (address & ~PMD_MASK); 429 return mp->vmaddr | (address & ~PMD_MASK);
429 } else if (segment & _SEGMENT_ENTRY_RO) { 430 } else if (segment & _SEGMENT_ENTRY_PROTECT) {
430 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 431 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
431 return vmaddr | (address & ~PMD_MASK); 432 return vmaddr | (address & ~PMD_MASK);
432 } 433 }
@@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
517 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 518 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
518 mp = (struct gmap_pgtable *) page->index; 519 mp = (struct gmap_pgtable *) page->index;
519 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 520 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
520 *rmap->entry = 521 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
521 _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 522 _SEGMENT_ENTRY_PROTECT);
522 list_del(&rmap->list); 523 list_del(&rmap->list);
523 kfree(rmap); 524 kfree(rmap);
524 flush = 1; 525 flush = 1;
@@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
545 /* Convert the gmap address to an mm address. */ 546 /* Convert the gmap address to an mm address. */
546 while (1) { 547 while (1) {
547 segment = *segment_ptr; 548 segment = *segment_ptr;
548 if (!(segment & _SEGMENT_ENTRY_INV)) { 549 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
549 /* Page table is present */ 550 /* Page table is present */
550 page = pfn_to_page(segment >> PAGE_SHIFT); 551 page = pfn_to_page(segment >> PAGE_SHIFT);
551 mp = (struct gmap_pgtable *) page->index; 552 mp = (struct gmap_pgtable *) page->index;
552 return mp->vmaddr | (address & ~PMD_MASK); 553 return mp->vmaddr | (address & ~PMD_MASK);
553 } 554 }
554 if (!(segment & _SEGMENT_ENTRY_RO)) 555 if (!(segment & _SEGMENT_ENTRY_PROTECT))
555 /* Nothing mapped in the gmap address space. */ 556 /* Nothing mapped in the gmap address space. */
556 break; 557 break;
557 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); 558 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
@@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
586 while (address < to) { 587 while (address < to) {
587 /* Walk the gmap address space page table */ 588 /* Walk the gmap address space page table */
588 table = gmap->table + ((address >> 53) & 0x7ff); 589 table = gmap->table + ((address >> 53) & 0x7ff);
589 if (unlikely(*table & _REGION_ENTRY_INV)) { 590 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
590 address = (address + PMD_SIZE) & PMD_MASK; 591 address = (address + PMD_SIZE) & PMD_MASK;
591 continue; 592 continue;
592 } 593 }
593 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 594 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
594 table = table + ((address >> 42) & 0x7ff); 595 table = table + ((address >> 42) & 0x7ff);
595 if (unlikely(*table & _REGION_ENTRY_INV)) { 596 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
596 address = (address + PMD_SIZE) & PMD_MASK; 597 address = (address + PMD_SIZE) & PMD_MASK;
597 continue; 598 continue;
598 } 599 }
599 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 600 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
600 table = table + ((address >> 31) & 0x7ff); 601 table = table + ((address >> 31) & 0x7ff);
601 if (unlikely(*table & _REGION_ENTRY_INV)) { 602 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
602 address = (address + PMD_SIZE) & PMD_MASK; 603 address = (address + PMD_SIZE) & PMD_MASK;
603 continue; 604 continue;
604 } 605 }
605 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 606 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
606 table = table + ((address >> 20) & 0x7ff); 607 table = table + ((address >> 20) & 0x7ff);
607 if (unlikely(*table & _SEGMENT_ENTRY_INV)) { 608 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
608 address = (address + PMD_SIZE) & PMD_MASK; 609 address = (address + PMD_SIZE) & PMD_MASK;
609 continue; 610 continue;
610 } 611 }
@@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
687 continue; 688 continue;
688 /* Set notification bit in the pgste of the pte */ 689 /* Set notification bit in the pgste of the pte */
689 entry = *ptep; 690 entry = *ptep;
690 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { 691 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
691 pgste = pgste_get_lock(ptep); 692 pgste = pgste_get_lock(ptep);
692 pgste_val(pgste) |= PGSTE_IN_BIT; 693 pgste_val(pgste) |= PGSTE_IN_BIT;
693 pgste_set_unlock(ptep, pgste); 694 pgste_set_unlock(ptep, pgste);
@@ -731,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
731 spin_unlock(&gmap_notifier_lock); 732 spin_unlock(&gmap_notifier_lock);
732} 733}
733 734
735static inline int page_table_with_pgste(struct page *page)
736{
737 return atomic_read(&page->_mapcount) == 0;
738}
739
734static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 740static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
735 unsigned long vmaddr) 741 unsigned long vmaddr)
736{ 742{
@@ -750,10 +756,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
750 mp->vmaddr = vmaddr & PMD_MASK; 756 mp->vmaddr = vmaddr & PMD_MASK;
751 INIT_LIST_HEAD(&mp->mapper); 757 INIT_LIST_HEAD(&mp->mapper);
752 page->index = (unsigned long) mp; 758 page->index = (unsigned long) mp;
753 atomic_set(&page->_mapcount, 3); 759 atomic_set(&page->_mapcount, 0);
754 table = (unsigned long *) page_to_phys(page); 760 table = (unsigned long *) page_to_phys(page);
755 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 761 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
756 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 762 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
763 PAGE_SIZE/2);
757 return table; 764 return table;
758} 765}
759 766
@@ -791,26 +798,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
791 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 798 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
792 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 799 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
793 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 800 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
794 unsigned long address, bits; 801 unsigned long address, bits, skey;
795 unsigned char skey;
796 802
797 address = pte_val(*ptep) & PAGE_MASK; 803 address = pte_val(*ptep) & PAGE_MASK;
798 skey = page_get_storage_key(address); 804 skey = (unsigned long) page_get_storage_key(address);
799 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 805 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
806 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
800 /* Set storage key ACC and FP */ 807 /* Set storage key ACC and FP */
801 page_set_storage_key(address, 808 page_set_storage_key(address, skey, !nq);
802 (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
803 !nq);
804
805 /* Merge host changed & referenced into pgste */ 809 /* Merge host changed & referenced into pgste */
806 pgste_val(new) |= bits << 52; 810 pgste_val(new) |= bits << 52;
807 /* Transfer skey changed & referenced bit to kvm user bits */
808 pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
809 } 811 }
810 /* changing the guest storage key is considered a change of the page */ 812 /* changing the guest storage key is considered a change of the page */
811 if ((pgste_val(new) ^ pgste_val(old)) & 813 if ((pgste_val(new) ^ pgste_val(old)) &
812 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 814 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
813 pgste_val(new) |= PGSTE_UC_BIT; 815 pgste_val(new) |= PGSTE_HC_BIT;
814 816
815 pgste_set_unlock(ptep, new); 817 pgste_set_unlock(ptep, new);
816 pte_unmap_unlock(*ptep, ptl); 818 pte_unmap_unlock(*ptep, ptl);
@@ -821,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
821 823
822#else /* CONFIG_PGSTE */ 824#else /* CONFIG_PGSTE */
823 825
826static inline int page_table_with_pgste(struct page *page)
827{
828 return 0;
829}
830
824static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 831static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
825 unsigned long vmaddr) 832 unsigned long vmaddr)
826{ 833{
@@ -878,7 +885,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
878 pgtable_page_ctor(page); 885 pgtable_page_ctor(page);
879 atomic_set(&page->_mapcount, 1); 886 atomic_set(&page->_mapcount, 1);
880 table = (unsigned long *) page_to_phys(page); 887 table = (unsigned long *) page_to_phys(page);
881 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 888 clear_table(table, _PAGE_INVALID, PAGE_SIZE);
882 spin_lock_bh(&mm->context.list_lock); 889 spin_lock_bh(&mm->context.list_lock);
883 list_add(&page->lru, &mm->context.pgtable_list); 890 list_add(&page->lru, &mm->context.pgtable_list);
884 } else { 891 } else {
@@ -897,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
897 struct page *page; 904 struct page *page;
898 unsigned int bit, mask; 905 unsigned int bit, mask;
899 906
900 if (mm_has_pgste(mm)) { 907 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
908 if (page_table_with_pgste(page)) {
901 gmap_disconnect_pgtable(mm, table); 909 gmap_disconnect_pgtable(mm, table);
902 return page_table_free_pgste(table); 910 return page_table_free_pgste(table);
903 } 911 }
904 /* Free 1K/2K page table fragment of a 4K page */ 912 /* Free 1K/2K page table fragment of a 4K page */
905 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
906 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 913 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
907 spin_lock_bh(&mm->context.list_lock); 914 spin_lock_bh(&mm->context.list_lock);
908 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 915 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -940,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
940 unsigned int bit, mask; 947 unsigned int bit, mask;
941 948
942 mm = tlb->mm; 949 mm = tlb->mm;
943 if (mm_has_pgste(mm)) { 950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 if (page_table_with_pgste(page)) {
944 gmap_disconnect_pgtable(mm, table); 952 gmap_disconnect_pgtable(mm, table);
945 table = (unsigned long *) (__pa(table) | FRAG_MASK); 953 table = (unsigned long *) (__pa(table) | FRAG_MASK);
946 tlb_remove_table(tlb, table); 954 tlb_remove_table(tlb, table);
947 return; 955 return;
948 } 956 }
949 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 957 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 spin_lock_bh(&mm->context.list_lock); 958 spin_lock_bh(&mm->context.list_lock);
952 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 959 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
953 list_del(&page->lru); 960 list_del(&page->lru);
@@ -1007,7 +1014,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
1007 struct mmu_table_batch **batch = &tlb->batch; 1014 struct mmu_table_batch **batch = &tlb->batch;
1008 1015
1009 if (*batch) { 1016 if (*batch) {
1010 __tlb_flush_mm(tlb->mm);
1011 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 1017 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
1012 *batch = NULL; 1018 *batch = NULL;
1013 } 1019 }
@@ -1017,11 +1023,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1017{ 1023{
1018 struct mmu_table_batch **batch = &tlb->batch; 1024 struct mmu_table_batch **batch = &tlb->batch;
1019 1025
1026 tlb->mm->context.flush_mm = 1;
1020 if (*batch == NULL) { 1027 if (*batch == NULL) {
1021 *batch = (struct mmu_table_batch *) 1028 *batch = (struct mmu_table_batch *)
1022 __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 1029 __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
1023 if (*batch == NULL) { 1030 if (*batch == NULL) {
1024 __tlb_flush_mm(tlb->mm); 1031 __tlb_flush_mm_lazy(tlb->mm);
1025 tlb_remove_table_one(table); 1032 tlb_remove_table_one(table);
1026 return; 1033 return;
1027 } 1034 }
@@ -1029,40 +1036,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1029 } 1036 }
1030 (*batch)->tables[(*batch)->nr++] = table; 1037 (*batch)->tables[(*batch)->nr++] = table;
1031 if ((*batch)->nr == MAX_TABLE_BATCH) 1038 if ((*batch)->nr == MAX_TABLE_BATCH)
1032 tlb_table_flush(tlb); 1039 tlb_flush_mmu(tlb);
1033} 1040}
1034 1041
1035#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1042#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1036void thp_split_vma(struct vm_area_struct *vma) 1043static inline void thp_split_vma(struct vm_area_struct *vma)
1037{ 1044{
1038 unsigned long addr; 1045 unsigned long addr;
1039 struct page *page;
1040 1046
1041 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1047 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1042 page = follow_page(vma, addr, FOLL_SPLIT); 1048 follow_page(vma, addr, FOLL_SPLIT);
1043 }
1044} 1049}
1045 1050
1046void thp_split_mm(struct mm_struct *mm) 1051static inline void thp_split_mm(struct mm_struct *mm)
1047{ 1052{
1048 struct vm_area_struct *vma = mm->mmap; 1053 struct vm_area_struct *vma;
1049 1054
1050 while (vma != NULL) { 1055 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1051 thp_split_vma(vma); 1056 thp_split_vma(vma);
1052 vma->vm_flags &= ~VM_HUGEPAGE; 1057 vma->vm_flags &= ~VM_HUGEPAGE;
1053 vma->vm_flags |= VM_NOHUGEPAGE; 1058 vma->vm_flags |= VM_NOHUGEPAGE;
1054 vma = vma->vm_next;
1055 } 1059 }
1060 mm->def_flags |= VM_NOHUGEPAGE;
1061}
1062#else
1063static inline void thp_split_mm(struct mm_struct *mm)
1064{
1056} 1065}
1057#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1066#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1058 1067
1068static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1069 struct mm_struct *mm, pud_t *pud,
1070 unsigned long addr, unsigned long end)
1071{
1072 unsigned long next, *table, *new;
1073 struct page *page;
1074 pmd_t *pmd;
1075
1076 pmd = pmd_offset(pud, addr);
1077 do {
1078 next = pmd_addr_end(addr, end);
1079again:
1080 if (pmd_none_or_clear_bad(pmd))
1081 continue;
1082 table = (unsigned long *) pmd_deref(*pmd);
1083 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1084 if (page_table_with_pgste(page))
1085 continue;
1086 /* Allocate new page table with pgstes */
1087 new = page_table_alloc_pgste(mm, addr);
1088 if (!new) {
1089 mm->context.has_pgste = 0;
1090 continue;
1091 }
1092 spin_lock(&mm->page_table_lock);
1093 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1094 /* Nuke pmd entry pointing to the "short" page table */
1095 pmdp_flush_lazy(mm, addr, pmd);
1096 pmd_clear(pmd);
1097 /* Copy ptes from old table to new table */
1098 memcpy(new, table, PAGE_SIZE/2);
1099 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1100 /* Establish new table */
1101 pmd_populate(mm, pmd, (pte_t *) new);
1102 /* Free old table with rcu, there might be a walker! */
1103 page_table_free_rcu(tlb, table);
1104 new = NULL;
1105 }
1106 spin_unlock(&mm->page_table_lock);
1107 if (new) {
1108 page_table_free_pgste(new);
1109 goto again;
1110 }
1111 } while (pmd++, addr = next, addr != end);
1112
1113 return addr;
1114}
1115
1116static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1117 struct mm_struct *mm, pgd_t *pgd,
1118 unsigned long addr, unsigned long end)
1119{
1120 unsigned long next;
1121 pud_t *pud;
1122
1123 pud = pud_offset(pgd, addr);
1124 do {
1125 next = pud_addr_end(addr, end);
1126 if (pud_none_or_clear_bad(pud))
1127 continue;
1128 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
1129 } while (pud++, addr = next, addr != end);
1130
1131 return addr;
1132}
1133
1134static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1135 unsigned long addr, unsigned long end)
1136{
1137 unsigned long next;
1138 pgd_t *pgd;
1139
1140 pgd = pgd_offset(mm, addr);
1141 do {
1142 next = pgd_addr_end(addr, end);
1143 if (pgd_none_or_clear_bad(pgd))
1144 continue;
1145 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end);
1147}
1148
1059/* 1149/*
1060 * switch on pgstes for its userspace process (for kvm) 1150 * switch on pgstes for its userspace process (for kvm)
1061 */ 1151 */
1062int s390_enable_sie(void) 1152int s390_enable_sie(void)
1063{ 1153{
1064 struct task_struct *tsk = current; 1154 struct task_struct *tsk = current;
1065 struct mm_struct *mm, *old_mm; 1155 struct mm_struct *mm = tsk->mm;
1156 struct mmu_gather tlb;
1066 1157
1067 /* Do we have switched amode? If no, we cannot do sie */ 1158 /* Do we have switched amode? If no, we cannot do sie */
1068 if (s390_user_mode == HOME_SPACE_MODE) 1159 if (s390_user_mode == HOME_SPACE_MODE)
@@ -1072,57 +1163,16 @@ int s390_enable_sie(void)
1072 if (mm_has_pgste(tsk->mm)) 1163 if (mm_has_pgste(tsk->mm))
1073 return 0; 1164 return 0;
1074 1165
1075 /* lets check if we are allowed to replace the mm */ 1166 down_write(&mm->mmap_sem);
1076 task_lock(tsk);
1077 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
1078#ifdef CONFIG_AIO
1079 !hlist_empty(&tsk->mm->ioctx_list) ||
1080#endif
1081 tsk->mm != tsk->active_mm) {
1082 task_unlock(tsk);
1083 return -EINVAL;
1084 }
1085 task_unlock(tsk);
1086
1087 /* we copy the mm and let dup_mm create the page tables with_pgstes */
1088 tsk->mm->context.alloc_pgste = 1;
1089 /* make sure that both mms have a correct rss state */
1090 sync_mm_rss(tsk->mm);
1091 mm = dup_mm(tsk);
1092 tsk->mm->context.alloc_pgste = 0;
1093 if (!mm)
1094 return -ENOMEM;
1095
1096#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1097 /* split thp mappings and disable thp for future mappings */ 1167 /* split thp mappings and disable thp for future mappings */
1098 thp_split_mm(mm); 1168 thp_split_mm(mm);
1099 mm->def_flags |= VM_NOHUGEPAGE; 1169 /* Reallocate the page tables with pgstes */
1100#endif 1170 mm->context.has_pgste = 1;
1101 1171 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
1102 /* Now lets check again if something happened */ 1172 page_table_realloc(&tlb, mm, 0, TASK_SIZE);
1103 task_lock(tsk); 1173 tlb_finish_mmu(&tlb, 0, TASK_SIZE);
1104 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 1174 up_write(&mm->mmap_sem);
1105#ifdef CONFIG_AIO 1175 return mm->context.has_pgste ? 0 : -ENOMEM;
1106 !hlist_empty(&tsk->mm->ioctx_list) ||
1107#endif
1108 tsk->mm != tsk->active_mm) {
1109 mmput(mm);
1110 task_unlock(tsk);
1111 return -EINVAL;
1112 }
1113
1114 /* ok, we are alone. No ptrace, no threads, etc. */
1115 old_mm = tsk->mm;
1116 tsk->mm = tsk->active_mm = mm;
1117 preempt_disable();
1118 update_mm(mm, tsk);
1119 atomic_inc(&mm->context.attach_count);
1120 atomic_dec(&old_mm->context.attach_count);
1121 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
1122 preempt_enable();
1123 task_unlock(tsk);
1124 mmput(old_mm);
1125 return 0;
1126} 1176}
1127EXPORT_SYMBOL_GPL(s390_enable_sie); 1177EXPORT_SYMBOL_GPL(s390_enable_sie);
1128 1178
@@ -1198,9 +1248,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1198 list_del(lh); 1248 list_del(lh);
1199 } 1249 }
1200 ptep = (pte_t *) pgtable; 1250 ptep = (pte_t *) pgtable;
1201 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 1251 pte_val(*ptep) = _PAGE_INVALID;
1202 ptep++; 1252 ptep++;
1203 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 1253 pte_val(*ptep) = _PAGE_INVALID;
1204 return pgtable; 1254 return pgtable;
1205} 1255}
1206#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1256#endif /* CONFIG_TRANSPARENT_HUGEPAGE */