diff options
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r-- | arch/s390/mm/pgtable.c | 266 |
1 files changed, 158 insertions, 108 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a8154a1a2c94..bf7c0dc64a76 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) | |||
161 | struct gmap_rmap *rmap; | 161 | struct gmap_rmap *rmap; |
162 | struct page *page; | 162 | struct page *page; |
163 | 163 | ||
164 | if (*table & _SEGMENT_ENTRY_INV) | 164 | if (*table & _SEGMENT_ENTRY_INVALID) |
165 | return 0; | 165 | return 0; |
166 | page = pfn_to_page(*table >> PAGE_SHIFT); | 166 | page = pfn_to_page(*table >> PAGE_SHIFT); |
167 | mp = (struct gmap_pgtable *) page->index; | 167 | mp = (struct gmap_pgtable *) page->index; |
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) | |||
172 | kfree(rmap); | 172 | kfree(rmap); |
173 | break; | 173 | break; |
174 | } | 174 | } |
175 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; | 175 | *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; |
176 | return 1; | 176 | return 1; |
177 | } | 177 | } |
178 | 178 | ||
@@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap, | |||
258 | return -ENOMEM; | 258 | return -ENOMEM; |
259 | new = (unsigned long *) page_to_phys(page); | 259 | new = (unsigned long *) page_to_phys(page); |
260 | crst_table_init(new, init); | 260 | crst_table_init(new, init); |
261 | if (*table & _REGION_ENTRY_INV) { | 261 | if (*table & _REGION_ENTRY_INVALID) { |
262 | list_add(&page->lru, &gmap->crst_list); | 262 | list_add(&page->lru, &gmap->crst_list); |
263 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | 263 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | |
264 | (*table & _REGION_ENTRY_TYPE_MASK); | 264 | (*table & _REGION_ENTRY_TYPE_MASK); |
@@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |||
292 | for (off = 0; off < len; off += PMD_SIZE) { | 292 | for (off = 0; off < len; off += PMD_SIZE) { |
293 | /* Walk the guest addr space page table */ | 293 | /* Walk the guest addr space page table */ |
294 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 294 | table = gmap->table + (((to + off) >> 53) & 0x7ff); |
295 | if (*table & _REGION_ENTRY_INV) | 295 | if (*table & _REGION_ENTRY_INVALID) |
296 | goto out; | 296 | goto out; |
297 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 297 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
298 | table = table + (((to + off) >> 42) & 0x7ff); | 298 | table = table + (((to + off) >> 42) & 0x7ff); |
299 | if (*table & _REGION_ENTRY_INV) | 299 | if (*table & _REGION_ENTRY_INVALID) |
300 | goto out; | 300 | goto out; |
301 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 301 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
302 | table = table + (((to + off) >> 31) & 0x7ff); | 302 | table = table + (((to + off) >> 31) & 0x7ff); |
303 | if (*table & _REGION_ENTRY_INV) | 303 | if (*table & _REGION_ENTRY_INVALID) |
304 | goto out; | 304 | goto out; |
305 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 305 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
306 | table = table + (((to + off) >> 20) & 0x7ff); | 306 | table = table + (((to + off) >> 20) & 0x7ff); |
307 | 307 | ||
308 | /* Clear segment table entry in guest address space. */ | 308 | /* Clear segment table entry in guest address space. */ |
309 | flush |= gmap_unlink_segment(gmap, table); | 309 | flush |= gmap_unlink_segment(gmap, table); |
310 | *table = _SEGMENT_ENTRY_INV; | 310 | *table = _SEGMENT_ENTRY_INVALID; |
311 | } | 311 | } |
312 | out: | 312 | out: |
313 | spin_unlock(&gmap->mm->page_table_lock); | 313 | spin_unlock(&gmap->mm->page_table_lock); |
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
335 | 335 | ||
336 | if ((from | to | len) & (PMD_SIZE - 1)) | 336 | if ((from | to | len) & (PMD_SIZE - 1)) |
337 | return -EINVAL; | 337 | return -EINVAL; |
338 | if (len == 0 || from + len > PGDIR_SIZE || | 338 | if (len == 0 || from + len > TASK_MAX_SIZE || |
339 | from + len < from || to + len < to) | 339 | from + len < from || to + len < to) |
340 | return -EINVAL; | 340 | return -EINVAL; |
341 | 341 | ||
@@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
345 | for (off = 0; off < len; off += PMD_SIZE) { | 345 | for (off = 0; off < len; off += PMD_SIZE) { |
346 | /* Walk the gmap address space page table */ | 346 | /* Walk the gmap address space page table */ |
347 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | 347 | table = gmap->table + (((to + off) >> 53) & 0x7ff); |
348 | if ((*table & _REGION_ENTRY_INV) && | 348 | if ((*table & _REGION_ENTRY_INVALID) && |
349 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) | 349 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) |
350 | goto out_unmap; | 350 | goto out_unmap; |
351 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 351 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
352 | table = table + (((to + off) >> 42) & 0x7ff); | 352 | table = table + (((to + off) >> 42) & 0x7ff); |
353 | if ((*table & _REGION_ENTRY_INV) && | 353 | if ((*table & _REGION_ENTRY_INVALID) && |
354 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) | 354 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) |
355 | goto out_unmap; | 355 | goto out_unmap; |
356 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 356 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
357 | table = table + (((to + off) >> 31) & 0x7ff); | 357 | table = table + (((to + off) >> 31) & 0x7ff); |
358 | if ((*table & _REGION_ENTRY_INV) && | 358 | if ((*table & _REGION_ENTRY_INVALID) && |
359 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) | 359 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) |
360 | goto out_unmap; | 360 | goto out_unmap; |
361 | table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); | 361 | table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); |
@@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, | |||
363 | 363 | ||
364 | /* Store 'from' address in an invalid segment table entry. */ | 364 | /* Store 'from' address in an invalid segment table entry. */ |
365 | flush |= gmap_unlink_segment(gmap, table); | 365 | flush |= gmap_unlink_segment(gmap, table); |
366 | *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); | 366 | *table = (from + off) | (_SEGMENT_ENTRY_INVALID | |
367 | _SEGMENT_ENTRY_PROTECT); | ||
367 | } | 368 | } |
368 | spin_unlock(&gmap->mm->page_table_lock); | 369 | spin_unlock(&gmap->mm->page_table_lock); |
369 | up_read(&gmap->mm->mmap_sem); | 370 | up_read(&gmap->mm->mmap_sem); |
@@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) | |||
384 | unsigned long *table; | 385 | unsigned long *table; |
385 | 386 | ||
386 | table = gmap->table + ((address >> 53) & 0x7ff); | 387 | table = gmap->table + ((address >> 53) & 0x7ff); |
387 | if (unlikely(*table & _REGION_ENTRY_INV)) | 388 | if (unlikely(*table & _REGION_ENTRY_INVALID)) |
388 | return ERR_PTR(-EFAULT); | 389 | return ERR_PTR(-EFAULT); |
389 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 390 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
390 | table = table + ((address >> 42) & 0x7ff); | 391 | table = table + ((address >> 42) & 0x7ff); |
391 | if (unlikely(*table & _REGION_ENTRY_INV)) | 392 | if (unlikely(*table & _REGION_ENTRY_INVALID)) |
392 | return ERR_PTR(-EFAULT); | 393 | return ERR_PTR(-EFAULT); |
393 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 394 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
394 | table = table + ((address >> 31) & 0x7ff); | 395 | table = table + ((address >> 31) & 0x7ff); |
395 | if (unlikely(*table & _REGION_ENTRY_INV)) | 396 | if (unlikely(*table & _REGION_ENTRY_INVALID)) |
396 | return ERR_PTR(-EFAULT); | 397 | return ERR_PTR(-EFAULT); |
397 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 398 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
398 | table = table + ((address >> 20) & 0x7ff); | 399 | table = table + ((address >> 20) & 0x7ff); |
@@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) | |||
422 | return PTR_ERR(segment_ptr); | 423 | return PTR_ERR(segment_ptr); |
423 | /* Convert the gmap address to an mm address. */ | 424 | /* Convert the gmap address to an mm address. */ |
424 | segment = *segment_ptr; | 425 | segment = *segment_ptr; |
425 | if (!(segment & _SEGMENT_ENTRY_INV)) { | 426 | if (!(segment & _SEGMENT_ENTRY_INVALID)) { |
426 | page = pfn_to_page(segment >> PAGE_SHIFT); | 427 | page = pfn_to_page(segment >> PAGE_SHIFT); |
427 | mp = (struct gmap_pgtable *) page->index; | 428 | mp = (struct gmap_pgtable *) page->index; |
428 | return mp->vmaddr | (address & ~PMD_MASK); | 429 | return mp->vmaddr | (address & ~PMD_MASK); |
429 | } else if (segment & _SEGMENT_ENTRY_RO) { | 430 | } else if (segment & _SEGMENT_ENTRY_PROTECT) { |
430 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; | 431 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; |
431 | return vmaddr | (address & ~PMD_MASK); | 432 | return vmaddr | (address & ~PMD_MASK); |
432 | } | 433 | } |
@@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) | |||
517 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 518 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
518 | mp = (struct gmap_pgtable *) page->index; | 519 | mp = (struct gmap_pgtable *) page->index; |
519 | list_for_each_entry_safe(rmap, next, &mp->mapper, list) { | 520 | list_for_each_entry_safe(rmap, next, &mp->mapper, list) { |
520 | *rmap->entry = | 521 | *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | |
521 | _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; | 522 | _SEGMENT_ENTRY_PROTECT); |
522 | list_del(&rmap->list); | 523 | list_del(&rmap->list); |
523 | kfree(rmap); | 524 | kfree(rmap); |
524 | flush = 1; | 525 | flush = 1; |
@@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) | |||
545 | /* Convert the gmap address to an mm address. */ | 546 | /* Convert the gmap address to an mm address. */ |
546 | while (1) { | 547 | while (1) { |
547 | segment = *segment_ptr; | 548 | segment = *segment_ptr; |
548 | if (!(segment & _SEGMENT_ENTRY_INV)) { | 549 | if (!(segment & _SEGMENT_ENTRY_INVALID)) { |
549 | /* Page table is present */ | 550 | /* Page table is present */ |
550 | page = pfn_to_page(segment >> PAGE_SHIFT); | 551 | page = pfn_to_page(segment >> PAGE_SHIFT); |
551 | mp = (struct gmap_pgtable *) page->index; | 552 | mp = (struct gmap_pgtable *) page->index; |
552 | return mp->vmaddr | (address & ~PMD_MASK); | 553 | return mp->vmaddr | (address & ~PMD_MASK); |
553 | } | 554 | } |
554 | if (!(segment & _SEGMENT_ENTRY_RO)) | 555 | if (!(segment & _SEGMENT_ENTRY_PROTECT)) |
555 | /* Nothing mapped in the gmap address space. */ | 556 | /* Nothing mapped in the gmap address space. */ |
556 | break; | 557 | break; |
557 | rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); | 558 | rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); |
@@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) | |||
586 | while (address < to) { | 587 | while (address < to) { |
587 | /* Walk the gmap address space page table */ | 588 | /* Walk the gmap address space page table */ |
588 | table = gmap->table + ((address >> 53) & 0x7ff); | 589 | table = gmap->table + ((address >> 53) & 0x7ff); |
589 | if (unlikely(*table & _REGION_ENTRY_INV)) { | 590 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { |
590 | address = (address + PMD_SIZE) & PMD_MASK; | 591 | address = (address + PMD_SIZE) & PMD_MASK; |
591 | continue; | 592 | continue; |
592 | } | 593 | } |
593 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 594 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
594 | table = table + ((address >> 42) & 0x7ff); | 595 | table = table + ((address >> 42) & 0x7ff); |
595 | if (unlikely(*table & _REGION_ENTRY_INV)) { | 596 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { |
596 | address = (address + PMD_SIZE) & PMD_MASK; | 597 | address = (address + PMD_SIZE) & PMD_MASK; |
597 | continue; | 598 | continue; |
598 | } | 599 | } |
599 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 600 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
600 | table = table + ((address >> 31) & 0x7ff); | 601 | table = table + ((address >> 31) & 0x7ff); |
601 | if (unlikely(*table & _REGION_ENTRY_INV)) { | 602 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { |
602 | address = (address + PMD_SIZE) & PMD_MASK; | 603 | address = (address + PMD_SIZE) & PMD_MASK; |
603 | continue; | 604 | continue; |
604 | } | 605 | } |
605 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | 606 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
606 | table = table + ((address >> 20) & 0x7ff); | 607 | table = table + ((address >> 20) & 0x7ff); |
607 | if (unlikely(*table & _SEGMENT_ENTRY_INV)) { | 608 | if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { |
608 | address = (address + PMD_SIZE) & PMD_MASK; | 609 | address = (address + PMD_SIZE) & PMD_MASK; |
609 | continue; | 610 | continue; |
610 | } | 611 | } |
@@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | |||
687 | continue; | 688 | continue; |
688 | /* Set notification bit in the pgste of the pte */ | 689 | /* Set notification bit in the pgste of the pte */ |
689 | entry = *ptep; | 690 | entry = *ptep; |
690 | if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { | 691 | if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { |
691 | pgste = pgste_get_lock(ptep); | 692 | pgste = pgste_get_lock(ptep); |
692 | pgste_val(pgste) |= PGSTE_IN_BIT; | 693 | pgste_val(pgste) |= PGSTE_IN_BIT; |
693 | pgste_set_unlock(ptep, pgste); | 694 | pgste_set_unlock(ptep, pgste); |
@@ -731,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) | |||
731 | spin_unlock(&gmap_notifier_lock); | 732 | spin_unlock(&gmap_notifier_lock); |
732 | } | 733 | } |
733 | 734 | ||
735 | static inline int page_table_with_pgste(struct page *page) | ||
736 | { | ||
737 | return atomic_read(&page->_mapcount) == 0; | ||
738 | } | ||
739 | |||
734 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 740 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
735 | unsigned long vmaddr) | 741 | unsigned long vmaddr) |
736 | { | 742 | { |
@@ -750,10 +756,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | |||
750 | mp->vmaddr = vmaddr & PMD_MASK; | 756 | mp->vmaddr = vmaddr & PMD_MASK; |
751 | INIT_LIST_HEAD(&mp->mapper); | 757 | INIT_LIST_HEAD(&mp->mapper); |
752 | page->index = (unsigned long) mp; | 758 | page->index = (unsigned long) mp; |
753 | atomic_set(&page->_mapcount, 3); | 759 | atomic_set(&page->_mapcount, 0); |
754 | table = (unsigned long *) page_to_phys(page); | 760 | table = (unsigned long *) page_to_phys(page); |
755 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | 761 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); |
756 | clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | 762 | clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, |
763 | PAGE_SIZE/2); | ||
757 | return table; | 764 | return table; |
758 | } | 765 | } |
759 | 766 | ||
@@ -791,26 +798,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, | |||
791 | pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; | 798 | pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; |
792 | pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; | 799 | pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; |
793 | if (!(pte_val(*ptep) & _PAGE_INVALID)) { | 800 | if (!(pte_val(*ptep) & _PAGE_INVALID)) { |
794 | unsigned long address, bits; | 801 | unsigned long address, bits, skey; |
795 | unsigned char skey; | ||
796 | 802 | ||
797 | address = pte_val(*ptep) & PAGE_MASK; | 803 | address = pte_val(*ptep) & PAGE_MASK; |
798 | skey = page_get_storage_key(address); | 804 | skey = (unsigned long) page_get_storage_key(address); |
799 | bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); | 805 | bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); |
806 | skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); | ||
800 | /* Set storage key ACC and FP */ | 807 | /* Set storage key ACC and FP */ |
801 | page_set_storage_key(address, | 808 | page_set_storage_key(address, skey, !nq); |
802 | (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)), | ||
803 | !nq); | ||
804 | |||
805 | /* Merge host changed & referenced into pgste */ | 809 | /* Merge host changed & referenced into pgste */ |
806 | pgste_val(new) |= bits << 52; | 810 | pgste_val(new) |= bits << 52; |
807 | /* Transfer skey changed & referenced bit to kvm user bits */ | ||
808 | pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ | ||
809 | } | 811 | } |
810 | /* changing the guest storage key is considered a change of the page */ | 812 | /* changing the guest storage key is considered a change of the page */ |
811 | if ((pgste_val(new) ^ pgste_val(old)) & | 813 | if ((pgste_val(new) ^ pgste_val(old)) & |
812 | (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) | 814 | (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) |
813 | pgste_val(new) |= PGSTE_UC_BIT; | 815 | pgste_val(new) |= PGSTE_HC_BIT; |
814 | 816 | ||
815 | pgste_set_unlock(ptep, new); | 817 | pgste_set_unlock(ptep, new); |
816 | pte_unmap_unlock(*ptep, ptl); | 818 | pte_unmap_unlock(*ptep, ptl); |
@@ -821,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key); | |||
821 | 823 | ||
822 | #else /* CONFIG_PGSTE */ | 824 | #else /* CONFIG_PGSTE */ |
823 | 825 | ||
826 | static inline int page_table_with_pgste(struct page *page) | ||
827 | { | ||
828 | return 0; | ||
829 | } | ||
830 | |||
824 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | 831 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
825 | unsigned long vmaddr) | 832 | unsigned long vmaddr) |
826 | { | 833 | { |
@@ -878,7 +885,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) | |||
878 | pgtable_page_ctor(page); | 885 | pgtable_page_ctor(page); |
879 | atomic_set(&page->_mapcount, 1); | 886 | atomic_set(&page->_mapcount, 1); |
880 | table = (unsigned long *) page_to_phys(page); | 887 | table = (unsigned long *) page_to_phys(page); |
881 | clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 888 | clear_table(table, _PAGE_INVALID, PAGE_SIZE); |
882 | spin_lock_bh(&mm->context.list_lock); | 889 | spin_lock_bh(&mm->context.list_lock); |
883 | list_add(&page->lru, &mm->context.pgtable_list); | 890 | list_add(&page->lru, &mm->context.pgtable_list); |
884 | } else { | 891 | } else { |
@@ -897,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
897 | struct page *page; | 904 | struct page *page; |
898 | unsigned int bit, mask; | 905 | unsigned int bit, mask; |
899 | 906 | ||
900 | if (mm_has_pgste(mm)) { | 907 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
908 | if (page_table_with_pgste(page)) { | ||
901 | gmap_disconnect_pgtable(mm, table); | 909 | gmap_disconnect_pgtable(mm, table); |
902 | return page_table_free_pgste(table); | 910 | return page_table_free_pgste(table); |
903 | } | 911 | } |
904 | /* Free 1K/2K page table fragment of a 4K page */ | 912 | /* Free 1K/2K page table fragment of a 4K page */ |
905 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
906 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); | 913 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
907 | spin_lock_bh(&mm->context.list_lock); | 914 | spin_lock_bh(&mm->context.list_lock); |
908 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 915 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
@@ -940,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) | |||
940 | unsigned int bit, mask; | 947 | unsigned int bit, mask; |
941 | 948 | ||
942 | mm = tlb->mm; | 949 | mm = tlb->mm; |
943 | if (mm_has_pgste(mm)) { | 950 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
951 | if (page_table_with_pgste(page)) { | ||
944 | gmap_disconnect_pgtable(mm, table); | 952 | gmap_disconnect_pgtable(mm, table); |
945 | table = (unsigned long *) (__pa(table) | FRAG_MASK); | 953 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
946 | tlb_remove_table(tlb, table); | 954 | tlb_remove_table(tlb, table); |
947 | return; | 955 | return; |
948 | } | 956 | } |
949 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | 957 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); |
950 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
951 | spin_lock_bh(&mm->context.list_lock); | 958 | spin_lock_bh(&mm->context.list_lock); |
952 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | 959 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
953 | list_del(&page->lru); | 960 | list_del(&page->lru); |
@@ -1007,7 +1014,6 @@ void tlb_table_flush(struct mmu_gather *tlb) | |||
1007 | struct mmu_table_batch **batch = &tlb->batch; | 1014 | struct mmu_table_batch **batch = &tlb->batch; |
1008 | 1015 | ||
1009 | if (*batch) { | 1016 | if (*batch) { |
1010 | __tlb_flush_mm(tlb->mm); | ||
1011 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); | 1017 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); |
1012 | *batch = NULL; | 1018 | *batch = NULL; |
1013 | } | 1019 | } |
@@ -1017,11 +1023,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) | |||
1017 | { | 1023 | { |
1018 | struct mmu_table_batch **batch = &tlb->batch; | 1024 | struct mmu_table_batch **batch = &tlb->batch; |
1019 | 1025 | ||
1026 | tlb->mm->context.flush_mm = 1; | ||
1020 | if (*batch == NULL) { | 1027 | if (*batch == NULL) { |
1021 | *batch = (struct mmu_table_batch *) | 1028 | *batch = (struct mmu_table_batch *) |
1022 | __get_free_page(GFP_NOWAIT | __GFP_NOWARN); | 1029 | __get_free_page(GFP_NOWAIT | __GFP_NOWARN); |
1023 | if (*batch == NULL) { | 1030 | if (*batch == NULL) { |
1024 | __tlb_flush_mm(tlb->mm); | 1031 | __tlb_flush_mm_lazy(tlb->mm); |
1025 | tlb_remove_table_one(table); | 1032 | tlb_remove_table_one(table); |
1026 | return; | 1033 | return; |
1027 | } | 1034 | } |
@@ -1029,40 +1036,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) | |||
1029 | } | 1036 | } |
1030 | (*batch)->tables[(*batch)->nr++] = table; | 1037 | (*batch)->tables[(*batch)->nr++] = table; |
1031 | if ((*batch)->nr == MAX_TABLE_BATCH) | 1038 | if ((*batch)->nr == MAX_TABLE_BATCH) |
1032 | tlb_table_flush(tlb); | 1039 | tlb_flush_mmu(tlb); |
1033 | } | 1040 | } |
1034 | 1041 | ||
1035 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 1042 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1036 | void thp_split_vma(struct vm_area_struct *vma) | 1043 | static inline void thp_split_vma(struct vm_area_struct *vma) |
1037 | { | 1044 | { |
1038 | unsigned long addr; | 1045 | unsigned long addr; |
1039 | struct page *page; | ||
1040 | 1046 | ||
1041 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { | 1047 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) |
1042 | page = follow_page(vma, addr, FOLL_SPLIT); | 1048 | follow_page(vma, addr, FOLL_SPLIT); |
1043 | } | ||
1044 | } | 1049 | } |
1045 | 1050 | ||
1046 | void thp_split_mm(struct mm_struct *mm) | 1051 | static inline void thp_split_mm(struct mm_struct *mm) |
1047 | { | 1052 | { |
1048 | struct vm_area_struct *vma = mm->mmap; | 1053 | struct vm_area_struct *vma; |
1049 | 1054 | ||
1050 | while (vma != NULL) { | 1055 | for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { |
1051 | thp_split_vma(vma); | 1056 | thp_split_vma(vma); |
1052 | vma->vm_flags &= ~VM_HUGEPAGE; | 1057 | vma->vm_flags &= ~VM_HUGEPAGE; |
1053 | vma->vm_flags |= VM_NOHUGEPAGE; | 1058 | vma->vm_flags |= VM_NOHUGEPAGE; |
1054 | vma = vma->vm_next; | ||
1055 | } | 1059 | } |
1060 | mm->def_flags |= VM_NOHUGEPAGE; | ||
1061 | } | ||
1062 | #else | ||
1063 | static inline void thp_split_mm(struct mm_struct *mm) | ||
1064 | { | ||
1056 | } | 1065 | } |
1057 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1066 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1058 | 1067 | ||
1068 | static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, | ||
1069 | struct mm_struct *mm, pud_t *pud, | ||
1070 | unsigned long addr, unsigned long end) | ||
1071 | { | ||
1072 | unsigned long next, *table, *new; | ||
1073 | struct page *page; | ||
1074 | pmd_t *pmd; | ||
1075 | |||
1076 | pmd = pmd_offset(pud, addr); | ||
1077 | do { | ||
1078 | next = pmd_addr_end(addr, end); | ||
1079 | again: | ||
1080 | if (pmd_none_or_clear_bad(pmd)) | ||
1081 | continue; | ||
1082 | table = (unsigned long *) pmd_deref(*pmd); | ||
1083 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | ||
1084 | if (page_table_with_pgste(page)) | ||
1085 | continue; | ||
1086 | /* Allocate new page table with pgstes */ | ||
1087 | new = page_table_alloc_pgste(mm, addr); | ||
1088 | if (!new) { | ||
1089 | mm->context.has_pgste = 0; | ||
1090 | continue; | ||
1091 | } | ||
1092 | spin_lock(&mm->page_table_lock); | ||
1093 | if (likely((unsigned long *) pmd_deref(*pmd) == table)) { | ||
1094 | /* Nuke pmd entry pointing to the "short" page table */ | ||
1095 | pmdp_flush_lazy(mm, addr, pmd); | ||
1096 | pmd_clear(pmd); | ||
1097 | /* Copy ptes from old table to new table */ | ||
1098 | memcpy(new, table, PAGE_SIZE/2); | ||
1099 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | ||
1100 | /* Establish new table */ | ||
1101 | pmd_populate(mm, pmd, (pte_t *) new); | ||
1102 | /* Free old table with rcu, there might be a walker! */ | ||
1103 | page_table_free_rcu(tlb, table); | ||
1104 | new = NULL; | ||
1105 | } | ||
1106 | spin_unlock(&mm->page_table_lock); | ||
1107 | if (new) { | ||
1108 | page_table_free_pgste(new); | ||
1109 | goto again; | ||
1110 | } | ||
1111 | } while (pmd++, addr = next, addr != end); | ||
1112 | |||
1113 | return addr; | ||
1114 | } | ||
1115 | |||
1116 | static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, | ||
1117 | struct mm_struct *mm, pgd_t *pgd, | ||
1118 | unsigned long addr, unsigned long end) | ||
1119 | { | ||
1120 | unsigned long next; | ||
1121 | pud_t *pud; | ||
1122 | |||
1123 | pud = pud_offset(pgd, addr); | ||
1124 | do { | ||
1125 | next = pud_addr_end(addr, end); | ||
1126 | if (pud_none_or_clear_bad(pud)) | ||
1127 | continue; | ||
1128 | next = page_table_realloc_pmd(tlb, mm, pud, addr, next); | ||
1129 | } while (pud++, addr = next, addr != end); | ||
1130 | |||
1131 | return addr; | ||
1132 | } | ||
1133 | |||
1134 | static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, | ||
1135 | unsigned long addr, unsigned long end) | ||
1136 | { | ||
1137 | unsigned long next; | ||
1138 | pgd_t *pgd; | ||
1139 | |||
1140 | pgd = pgd_offset(mm, addr); | ||
1141 | do { | ||
1142 | next = pgd_addr_end(addr, end); | ||
1143 | if (pgd_none_or_clear_bad(pgd)) | ||
1144 | continue; | ||
1145 | next = page_table_realloc_pud(tlb, mm, pgd, addr, next); | ||
1146 | } while (pgd++, addr = next, addr != end); | ||
1147 | } | ||
1148 | |||
1059 | /* | 1149 | /* |
1060 | * switch on pgstes for its userspace process (for kvm) | 1150 | * switch on pgstes for its userspace process (for kvm) |
1061 | */ | 1151 | */ |
1062 | int s390_enable_sie(void) | 1152 | int s390_enable_sie(void) |
1063 | { | 1153 | { |
1064 | struct task_struct *tsk = current; | 1154 | struct task_struct *tsk = current; |
1065 | struct mm_struct *mm, *old_mm; | 1155 | struct mm_struct *mm = tsk->mm; |
1156 | struct mmu_gather tlb; | ||
1066 | 1157 | ||
1067 | /* Do we have switched amode? If no, we cannot do sie */ | 1158 | /* Do we have switched amode? If no, we cannot do sie */ |
1068 | if (s390_user_mode == HOME_SPACE_MODE) | 1159 | if (s390_user_mode == HOME_SPACE_MODE) |
@@ -1072,57 +1163,16 @@ int s390_enable_sie(void) | |||
1072 | if (mm_has_pgste(tsk->mm)) | 1163 | if (mm_has_pgste(tsk->mm)) |
1073 | return 0; | 1164 | return 0; |
1074 | 1165 | ||
1075 | /* lets check if we are allowed to replace the mm */ | 1166 | down_write(&mm->mmap_sem); |
1076 | task_lock(tsk); | ||
1077 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | ||
1078 | #ifdef CONFIG_AIO | ||
1079 | !hlist_empty(&tsk->mm->ioctx_list) || | ||
1080 | #endif | ||
1081 | tsk->mm != tsk->active_mm) { | ||
1082 | task_unlock(tsk); | ||
1083 | return -EINVAL; | ||
1084 | } | ||
1085 | task_unlock(tsk); | ||
1086 | |||
1087 | /* we copy the mm and let dup_mm create the page tables with_pgstes */ | ||
1088 | tsk->mm->context.alloc_pgste = 1; | ||
1089 | /* make sure that both mms have a correct rss state */ | ||
1090 | sync_mm_rss(tsk->mm); | ||
1091 | mm = dup_mm(tsk); | ||
1092 | tsk->mm->context.alloc_pgste = 0; | ||
1093 | if (!mm) | ||
1094 | return -ENOMEM; | ||
1095 | |||
1096 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1097 | /* split thp mappings and disable thp for future mappings */ | 1167 | /* split thp mappings and disable thp for future mappings */ |
1098 | thp_split_mm(mm); | 1168 | thp_split_mm(mm); |
1099 | mm->def_flags |= VM_NOHUGEPAGE; | 1169 | /* Reallocate the page tables with pgstes */ |
1100 | #endif | 1170 | mm->context.has_pgste = 1; |
1101 | 1171 | tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); | |
1102 | /* Now lets check again if something happened */ | 1172 | page_table_realloc(&tlb, mm, 0, TASK_SIZE); |
1103 | task_lock(tsk); | 1173 | tlb_finish_mmu(&tlb, 0, TASK_SIZE); |
1104 | if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 1174 | up_write(&mm->mmap_sem); |
1105 | #ifdef CONFIG_AIO | 1175 | return mm->context.has_pgste ? 0 : -ENOMEM; |
1106 | !hlist_empty(&tsk->mm->ioctx_list) || | ||
1107 | #endif | ||
1108 | tsk->mm != tsk->active_mm) { | ||
1109 | mmput(mm); | ||
1110 | task_unlock(tsk); | ||
1111 | return -EINVAL; | ||
1112 | } | ||
1113 | |||
1114 | /* ok, we are alone. No ptrace, no threads, etc. */ | ||
1115 | old_mm = tsk->mm; | ||
1116 | tsk->mm = tsk->active_mm = mm; | ||
1117 | preempt_disable(); | ||
1118 | update_mm(mm, tsk); | ||
1119 | atomic_inc(&mm->context.attach_count); | ||
1120 | atomic_dec(&old_mm->context.attach_count); | ||
1121 | cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | ||
1122 | preempt_enable(); | ||
1123 | task_unlock(tsk); | ||
1124 | mmput(old_mm); | ||
1125 | return 0; | ||
1126 | } | 1176 | } |
1127 | EXPORT_SYMBOL_GPL(s390_enable_sie); | 1177 | EXPORT_SYMBOL_GPL(s390_enable_sie); |
1128 | 1178 | ||
@@ -1198,9 +1248,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) | |||
1198 | list_del(lh); | 1248 | list_del(lh); |
1199 | } | 1249 | } |
1200 | ptep = (pte_t *) pgtable; | 1250 | ptep = (pte_t *) pgtable; |
1201 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 1251 | pte_val(*ptep) = _PAGE_INVALID; |
1202 | ptep++; | 1252 | ptep++; |
1203 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 1253 | pte_val(*ptep) = _PAGE_INVALID; |
1204 | return pgtable; | 1254 | return pgtable; |
1205 | } | 1255 | } |
1206 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1256 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |