aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/include/asm/pgtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r--arch/s390/include/asm/pgtable.h607
1 files changed, 312 insertions, 295 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 02ace3491c51..c4773a2ef3d3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -31,9 +31,8 @@
31#ifndef __ASSEMBLY__ 31#ifndef __ASSEMBLY__
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/mm_types.h> 33#include <linux/mm_types.h>
34#include <asm/bitops.h>
35#include <asm/bug.h> 34#include <asm/bug.h>
36#include <asm/processor.h> 35#include <asm/page.h>
37 36
38extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); 37extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
39extern void paging_init(void); 38extern void paging_init(void);
@@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START;
243/* Software bits in the page table entry */ 242/* Software bits in the page table entry */
244#define _PAGE_SWT 0x001 /* SW pte type bit t */ 243#define _PAGE_SWT 0x001 /* SW pte type bit t */
245#define _PAGE_SWX 0x002 /* SW pte type bit x */ 244#define _PAGE_SWX 0x002 /* SW pte type bit x */
246#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ 245#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */
246#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */
247#define _PAGE_SPECIAL 0x010 /* SW associated with special page */
247#define __HAVE_ARCH_PTE_SPECIAL 248#define __HAVE_ARCH_PTE_SPECIAL
248 249
249/* Set of bits not changed in pte_modify */ 250/* Set of bits not changed in pte_modify */
250#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) 251#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR)
251 252
252/* Six different types of pages. */ 253/* Six different types of pages. */
253#define _PAGE_TYPE_EMPTY 0x400 254#define _PAGE_TYPE_EMPTY 0x400
@@ -256,8 +257,6 @@ extern unsigned long VMALLOC_START;
256#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ 257#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
257#define _PAGE_TYPE_RO 0x200 258#define _PAGE_TYPE_RO 0x200
258#define _PAGE_TYPE_RW 0x000 259#define _PAGE_TYPE_RW 0x000
259#define _PAGE_TYPE_EX_RO 0x202
260#define _PAGE_TYPE_EX_RW 0x002
261 260
262/* 261/*
263 * Only four types for huge pages, using the invalid bit and protection bit 262 * Only four types for huge pages, using the invalid bit and protection bit
@@ -287,8 +286,6 @@ extern unsigned long VMALLOC_START;
287 * _PAGE_TYPE_FILE 11?1 -> 11?1 286 * _PAGE_TYPE_FILE 11?1 -> 11?1
288 * _PAGE_TYPE_RO 0100 -> 1100 287 * _PAGE_TYPE_RO 0100 -> 1100
289 * _PAGE_TYPE_RW 0000 -> 1000 288 * _PAGE_TYPE_RW 0000 -> 1000
290 * _PAGE_TYPE_EX_RO 0110 -> 1110
291 * _PAGE_TYPE_EX_RW 0010 -> 1010
292 * 289 *
293 * pte_none is true for bits combinations 1000, 1010, 1100, 1110 290 * pte_none is true for bits combinations 1000, 1010, 1100, 1110
294 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 291 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
@@ -297,14 +294,17 @@ extern unsigned long VMALLOC_START;
297 */ 294 */
298 295
299/* Page status table bits for virtualization */ 296/* Page status table bits for virtualization */
300#define RCP_PCL_BIT 55 297#define RCP_ACC_BITS 0xf000000000000000UL
301#define RCP_HR_BIT 54 298#define RCP_FP_BIT 0x0800000000000000UL
302#define RCP_HC_BIT 53 299#define RCP_PCL_BIT 0x0080000000000000UL
303#define RCP_GR_BIT 50 300#define RCP_HR_BIT 0x0040000000000000UL
304#define RCP_GC_BIT 49 301#define RCP_HC_BIT 0x0020000000000000UL
305 302#define RCP_GR_BIT 0x0004000000000000UL
306/* User dirty bit for KVM's migration feature */ 303#define RCP_GC_BIT 0x0002000000000000UL
307#define KVM_UD_BIT 47 304
305/* User dirty / referenced bit for KVM's migration feature */
306#define KVM_UR_BIT 0x0000800000000000UL
307#define KVM_UC_BIT 0x0000400000000000UL
308 308
309#ifndef __s390x__ 309#ifndef __s390x__
310 310
@@ -377,85 +377,54 @@ extern unsigned long VMALLOC_START;
377#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ 377#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
378 _ASCE_ALT_EVENT) 378 _ASCE_ALT_EVENT)
379 379
380/* Bits int the storage key */
381#define _PAGE_CHANGED 0x02 /* HW changed bit */
382#define _PAGE_REFERENCED 0x04 /* HW referenced bit */
383
384/* 380/*
385 * Page protection definitions. 381 * Page protection definitions.
386 */ 382 */
387#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) 383#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
388#define PAGE_RO __pgprot(_PAGE_TYPE_RO) 384#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
389#define PAGE_RW __pgprot(_PAGE_TYPE_RW) 385#define PAGE_RW __pgprot(_PAGE_TYPE_RW)
390#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO)
391#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW)
392 386
393#define PAGE_KERNEL PAGE_RW 387#define PAGE_KERNEL PAGE_RW
394#define PAGE_COPY PAGE_RO 388#define PAGE_COPY PAGE_RO
395 389
396/* 390/*
397 * Dependent on the EXEC_PROTECT option s390 can do execute protection. 391 * On s390 the page table entry has an invalid bit and a read-only bit.
398 * Write permission always implies read permission. In theory with a 392 * Read permission implies execute permission and write permission
399 * primary/secondary page table execute only can be implemented but 393 * implies read permission.
400 * it would cost an additional bit in the pte to distinguish all the
401 * different pte types. To avoid that execute permission currently
402 * implies read permission as well.
403 */ 394 */
404 /*xwr*/ 395 /*xwr*/
405#define __P000 PAGE_NONE 396#define __P000 PAGE_NONE
406#define __P001 PAGE_RO 397#define __P001 PAGE_RO
407#define __P010 PAGE_RO 398#define __P010 PAGE_RO
408#define __P011 PAGE_RO 399#define __P011 PAGE_RO
409#define __P100 PAGE_EX_RO 400#define __P100 PAGE_RO
410#define __P101 PAGE_EX_RO 401#define __P101 PAGE_RO
411#define __P110 PAGE_EX_RO 402#define __P110 PAGE_RO
412#define __P111 PAGE_EX_RO 403#define __P111 PAGE_RO
413 404
414#define __S000 PAGE_NONE 405#define __S000 PAGE_NONE
415#define __S001 PAGE_RO 406#define __S001 PAGE_RO
416#define __S010 PAGE_RW 407#define __S010 PAGE_RW
417#define __S011 PAGE_RW 408#define __S011 PAGE_RW
418#define __S100 PAGE_EX_RO 409#define __S100 PAGE_RO
419#define __S101 PAGE_EX_RO 410#define __S101 PAGE_RO
420#define __S110 PAGE_EX_RW 411#define __S110 PAGE_RW
421#define __S111 PAGE_EX_RW 412#define __S111 PAGE_RW
422
423#ifndef __s390x__
424# define PxD_SHADOW_SHIFT 1
425#else /* __s390x__ */
426# define PxD_SHADOW_SHIFT 2
427#endif /* __s390x__ */
428 413
429static inline void *get_shadow_table(void *table) 414static inline int mm_exclusive(struct mm_struct *mm)
430{ 415{
431 unsigned long addr, offset; 416 return likely(mm == current->active_mm &&
432 struct page *page; 417 atomic_read(&mm->context.attach_count) <= 1);
433
434 addr = (unsigned long) table;
435 offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1);
436 page = virt_to_page((void *)(addr ^ offset));
437 return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL);
438} 418}
439 419
440/* 420static inline int mm_has_pgste(struct mm_struct *mm)
441 * Certain architectures need to do special things when PTEs
442 * within a page table are directly modified. Thus, the following
443 * hook is made available.
444 */
445static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
446 pte_t *ptep, pte_t entry)
447{ 421{
448 *ptep = entry; 422#ifdef CONFIG_PGSTE
449 if (mm->context.noexec) { 423 if (unlikely(mm->context.has_pgste))
450 if (!(pte_val(entry) & _PAGE_INVALID) && 424 return 1;
451 (pte_val(entry) & _PAGE_SWX)) 425#endif
452 pte_val(entry) |= _PAGE_RO; 426 return 0;
453 else
454 pte_val(entry) = _PAGE_TYPE_EMPTY;
455 ptep[PTRS_PER_PTE] = entry;
456 }
457} 427}
458
459/* 428/*
460 * pgd/pmd/pte query functions 429 * pgd/pmd/pte query functions
461 */ 430 */
@@ -568,52 +537,127 @@ static inline int pte_special(pte_t pte)
568} 537}
569 538
570#define __HAVE_ARCH_PTE_SAME 539#define __HAVE_ARCH_PTE_SAME
571#define pte_same(a,b) (pte_val(a) == pte_val(b)) 540static inline int pte_same(pte_t a, pte_t b)
541{
542 return pte_val(a) == pte_val(b);
543}
572 544
573static inline void rcp_lock(pte_t *ptep) 545static inline pgste_t pgste_get_lock(pte_t *ptep)
574{ 546{
547 unsigned long new = 0;
575#ifdef CONFIG_PGSTE 548#ifdef CONFIG_PGSTE
576 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 549 unsigned long old;
550
577 preempt_disable(); 551 preempt_disable();
578 while (test_and_set_bit(RCP_PCL_BIT, pgste)) 552 asm(
579 ; 553 " lg %0,%2\n"
554 "0: lgr %1,%0\n"
555 " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
556 " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
557 " csg %0,%1,%2\n"
558 " jl 0b\n"
559 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
560 : "Q" (ptep[PTRS_PER_PTE]) : "cc");
580#endif 561#endif
562 return __pgste(new);
581} 563}
582 564
583static inline void rcp_unlock(pte_t *ptep) 565static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
584{ 566{
585#ifdef CONFIG_PGSTE 567#ifdef CONFIG_PGSTE
586 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 568 asm(
587 clear_bit(RCP_PCL_BIT, pgste); 569 " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
570 " stg %1,%0\n"
571 : "=Q" (ptep[PTRS_PER_PTE])
572 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc");
588 preempt_enable(); 573 preempt_enable();
589#endif 574#endif
590} 575}
591 576
592/* forward declaration for SetPageUptodate in page-flags.h*/ 577static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
593static inline void page_clear_dirty(struct page *page, int mapped);
594#include <linux/page-flags.h>
595
596static inline void ptep_rcp_copy(pte_t *ptep)
597{ 578{
598#ifdef CONFIG_PGSTE 579#ifdef CONFIG_PGSTE
599 struct page *page = virt_to_page(pte_val(*ptep)); 580 unsigned long pfn, bits;
600 unsigned int skey; 581 unsigned char skey;
601 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 582
602 583 pfn = pte_val(*ptep) >> PAGE_SHIFT;
603 skey = page_get_storage_key(page_to_phys(page)); 584 skey = page_get_storage_key(pfn);
604 if (skey & _PAGE_CHANGED) { 585 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
605 set_bit_simple(RCP_GC_BIT, pgste); 586 /* Clear page changed & referenced bit in the storage key */
606 set_bit_simple(KVM_UD_BIT, pgste); 587 if (bits) {
588 skey ^= bits;
589 page_set_storage_key(pfn, skey, 1);
607 } 590 }
608 if (skey & _PAGE_REFERENCED) 591 /* Transfer page changed & referenced bit to guest bits in pgste */
609 set_bit_simple(RCP_GR_BIT, pgste); 592 pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
610 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { 593 /* Get host changed & referenced bits from pgste */
611 SetPageDirty(page); 594 bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
612 set_bit_simple(KVM_UD_BIT, pgste); 595 /* Clear host bits in pgste. */
613 } 596 pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
614 if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) 597 pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
615 SetPageReferenced(page); 598 /* Copy page access key and fetch protection bit to pgste */
599 pgste_val(pgste) |=
600 (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
601 /* Transfer changed and referenced to kvm user bits */
602 pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
603 /* Transfer changed & referenced to pte sofware bits */
604 pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */
616#endif 605#endif
606 return pgste;
607
608}
609
610static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
611{
612#ifdef CONFIG_PGSTE
613 int young;
614
615 young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
616 /* Transfer page referenced bit to pte software bit (host view) */
617 if (young || (pgste_val(pgste) & RCP_HR_BIT))
618 pte_val(*ptep) |= _PAGE_SWR;
619 /* Clear host referenced bit in pgste. */
620 pgste_val(pgste) &= ~RCP_HR_BIT;
621 /* Transfer page referenced bit to guest bit in pgste */
622 pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */
623#endif
624 return pgste;
625
626}
627
628static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
629{
630#ifdef CONFIG_PGSTE
631 unsigned long pfn;
632 unsigned long okey, nkey;
633
634 pfn = pte_val(*ptep) >> PAGE_SHIFT;
635 okey = nkey = page_get_storage_key(pfn);
636 nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
637 /* Set page access key and fetch protection bit from pgste */
638 nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
639 if (okey != nkey)
640 page_set_storage_key(pfn, nkey, 1);
641#endif
642}
643
644/*
645 * Certain architectures need to do special things when PTEs
646 * within a page table are directly modified. Thus, the following
647 * hook is made available.
648 */
649static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
650 pte_t *ptep, pte_t entry)
651{
652 pgste_t pgste;
653
654 if (mm_has_pgste(mm)) {
655 pgste = pgste_get_lock(ptep);
656 pgste_set_pte(ptep, pgste);
657 *ptep = entry;
658 pgste_set_unlock(ptep, pgste);
659 } else
660 *ptep = entry;
617} 661}
618 662
619/* 663/*
@@ -627,19 +671,19 @@ static inline int pte_write(pte_t pte)
627 671
628static inline int pte_dirty(pte_t pte) 672static inline int pte_dirty(pte_t pte)
629{ 673{
630 /* A pte is neither clean nor dirty on s/390. The dirty bit 674#ifdef CONFIG_PGSTE
631 * is in the storage key. See page_test_and_clear_dirty for 675 if (pte_val(pte) & _PAGE_SWC)
632 * details. 676 return 1;
633 */ 677#endif
634 return 0; 678 return 0;
635} 679}
636 680
637static inline int pte_young(pte_t pte) 681static inline int pte_young(pte_t pte)
638{ 682{
639 /* A pte is neither young nor old on s/390. The young bit 683#ifdef CONFIG_PGSTE
640 * is in the storage key. See page_test_and_clear_young for 684 if (pte_val(pte) & _PAGE_SWR)
641 * details. 685 return 1;
642 */ 686#endif
643 return 0; 687 return 0;
644} 688}
645 689
@@ -647,64 +691,30 @@ static inline int pte_young(pte_t pte)
647 * pgd/pmd/pte modification functions 691 * pgd/pmd/pte modification functions
648 */ 692 */
649 693
650#ifndef __s390x__ 694static inline void pgd_clear(pgd_t *pgd)
651
652#define pgd_clear(pgd) do { } while (0)
653#define pud_clear(pud) do { } while (0)
654
655#else /* __s390x__ */
656
657static inline void pgd_clear_kernel(pgd_t * pgd)
658{ 695{
696#ifdef __s390x__
659 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) 697 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
660 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; 698 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
699#endif
661} 700}
662 701
663static inline void pgd_clear(pgd_t * pgd) 702static inline void pud_clear(pud_t *pud)
664{
665 pgd_t *shadow = get_shadow_table(pgd);
666
667 pgd_clear_kernel(pgd);
668 if (shadow)
669 pgd_clear_kernel(shadow);
670}
671
672static inline void pud_clear_kernel(pud_t *pud)
673{ 703{
704#ifdef __s390x__
674 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 705 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
675 pud_val(*pud) = _REGION3_ENTRY_EMPTY; 706 pud_val(*pud) = _REGION3_ENTRY_EMPTY;
707#endif
676} 708}
677 709
678static inline void pud_clear(pud_t *pud) 710static inline void pmd_clear(pmd_t *pmdp)
679{
680 pud_t *shadow = get_shadow_table(pud);
681
682 pud_clear_kernel(pud);
683 if (shadow)
684 pud_clear_kernel(shadow);
685}
686
687#endif /* __s390x__ */
688
689static inline void pmd_clear_kernel(pmd_t * pmdp)
690{ 711{
691 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; 712 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
692} 713}
693 714
694static inline void pmd_clear(pmd_t *pmd)
695{
696 pmd_t *shadow = get_shadow_table(pmd);
697
698 pmd_clear_kernel(pmd);
699 if (shadow)
700 pmd_clear_kernel(shadow);
701}
702
703static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 715static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
704{ 716{
705 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 717 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
706 if (mm->context.noexec)
707 pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
708} 718}
709 719
710/* 720/*
@@ -734,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte)
734 744
735static inline pte_t pte_mkclean(pte_t pte) 745static inline pte_t pte_mkclean(pte_t pte)
736{ 746{
737 /* The only user of pte_mkclean is the fork() code. 747#ifdef CONFIG_PGSTE
738 We must *not* clear the *physical* page dirty bit 748 pte_val(pte) &= ~_PAGE_SWC;
739 just because fork() wants to clear the dirty bit in 749#endif
740 *one* of the page's mappings. So we just do nothing. */
741 return pte; 750 return pte;
742} 751}
743 752
744static inline pte_t pte_mkdirty(pte_t pte) 753static inline pte_t pte_mkdirty(pte_t pte)
745{ 754{
746 /* We do not explicitly set the dirty bit because the
747 * sske instruction is slow. It is faster to let the
748 * next instruction set the dirty bit.
749 */
750 return pte; 755 return pte;
751} 756}
752 757
753static inline pte_t pte_mkold(pte_t pte) 758static inline pte_t pte_mkold(pte_t pte)
754{ 759{
755 /* S/390 doesn't keep its dirty/referenced bit in the pte. 760#ifdef CONFIG_PGSTE
756 * There is no point in clearing the real referenced bit. 761 pte_val(pte) &= ~_PAGE_SWR;
757 */ 762#endif
758 return pte; 763 return pte;
759} 764}
760 765
761static inline pte_t pte_mkyoung(pte_t pte) 766static inline pte_t pte_mkyoung(pte_t pte)
762{ 767{
763 /* S/390 doesn't keep its dirty/referenced bit in the pte.
764 * There is no point in setting the real referenced bit.
765 */
766 return pte; 768 return pte;
767} 769}
768 770
@@ -800,62 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte)
800} 802}
801#endif 803#endif
802 804
803#ifdef CONFIG_PGSTE
804/* 805/*
805 * Get (and clear) the user dirty bit for a PTE. 806 * Get (and clear) the user dirty bit for a pte.
806 */ 807 */
807static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, 808static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
808 pte_t *ptep) 809 pte_t *ptep)
809{ 810{
810 int dirty; 811 pgste_t pgste;
811 unsigned long *pgste; 812 int dirty = 0;
812 struct page *page; 813
813 unsigned int skey; 814 if (mm_has_pgste(mm)) {
814 815 pgste = pgste_get_lock(ptep);
815 if (!mm->context.has_pgste) 816 pgste = pgste_update_all(ptep, pgste);
816 return -EINVAL; 817 dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
817 rcp_lock(ptep); 818 pgste_val(pgste) &= ~KVM_UC_BIT;
818 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 819 pgste_set_unlock(ptep, pgste);
819 page = virt_to_page(pte_val(*ptep)); 820 return dirty;
820 skey = page_get_storage_key(page_to_phys(page));
821 if (skey & _PAGE_CHANGED) {
822 set_bit_simple(RCP_GC_BIT, pgste);
823 set_bit_simple(KVM_UD_BIT, pgste);
824 } 821 }
825 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
826 SetPageDirty(page);
827 set_bit_simple(KVM_UD_BIT, pgste);
828 }
829 dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
830 if (skey & _PAGE_CHANGED)
831 page_clear_dirty(page, 1);
832 rcp_unlock(ptep);
833 return dirty; 822 return dirty;
834} 823}
835#endif 824
825/*
826 * Get (and clear) the user referenced bit for a pte.
827 */
828static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
829 pte_t *ptep)
830{
831 pgste_t pgste;
832 int young = 0;
833
834 if (mm_has_pgste(mm)) {
835 pgste = pgste_get_lock(ptep);
836 pgste = pgste_update_young(ptep, pgste);
837 young = !!(pgste_val(pgste) & KVM_UR_BIT);
838 pgste_val(pgste) &= ~KVM_UR_BIT;
839 pgste_set_unlock(ptep, pgste);
840 }
841 return young;
842}
836 843
837#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 844#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
838static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 845static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
839 unsigned long addr, pte_t *ptep) 846 unsigned long addr, pte_t *ptep)
840{ 847{
841#ifdef CONFIG_PGSTE 848 pgste_t pgste;
842 unsigned long physpage; 849 pte_t pte;
843 int young;
844 unsigned long *pgste;
845 850
846 if (!vma->vm_mm->context.has_pgste) 851 if (mm_has_pgste(vma->vm_mm)) {
847 return 0; 852 pgste = pgste_get_lock(ptep);
848 physpage = pte_val(*ptep) & PAGE_MASK; 853 pgste = pgste_update_young(ptep, pgste);
849 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 854 pte = *ptep;
850 855 *ptep = pte_mkold(pte);
851 young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); 856 pgste_set_unlock(ptep, pgste);
852 rcp_lock(ptep); 857 return pte_young(pte);
853 if (young) 858 }
854 set_bit_simple(RCP_GR_BIT, pgste);
855 young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste);
856 rcp_unlock(ptep);
857 return young;
858#endif
859 return 0; 859 return 0;
860} 860}
861 861
@@ -867,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
867 * On s390 reference bits are in storage key and never in TLB 867 * On s390 reference bits are in storage key and never in TLB
868 * With virtualization we handle the reference bit, without we 868 * With virtualization we handle the reference bit, without we
869 * we can simply return */ 869 * we can simply return */
870#ifdef CONFIG_PGSTE
871 return ptep_test_and_clear_young(vma, address, ptep); 870 return ptep_test_and_clear_young(vma, address, ptep);
872#endif
873 return 0;
874} 871}
875 872
876static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 873static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -890,25 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
890 } 887 }
891} 888}
892 889
893static inline void ptep_invalidate(struct mm_struct *mm,
894 unsigned long address, pte_t *ptep)
895{
896 if (mm->context.has_pgste) {
897 rcp_lock(ptep);
898 __ptep_ipte(address, ptep);
899 ptep_rcp_copy(ptep);
900 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
901 rcp_unlock(ptep);
902 return;
903 }
904 __ptep_ipte(address, ptep);
905 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
906 if (mm->context.noexec) {
907 __ptep_ipte(address, ptep + PTRS_PER_PTE);
908 pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
909 }
910}
911
912/* 890/*
913 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush 891 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
914 * both clear the TLB for the unmapped pte. The reason is that 892 * both clear the TLB for the unmapped pte. The reason is that
@@ -923,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm,
923 * is a nop. 901 * is a nop.
924 */ 902 */
925#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 903#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
926#define ptep_get_and_clear(__mm, __address, __ptep) \ 904static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
927({ \ 905 unsigned long address, pte_t *ptep)
928 pte_t __pte = *(__ptep); \ 906{
929 (__mm)->context.flush_mm = 1; \ 907 pgste_t pgste;
930 if (atomic_read(&(__mm)->context.attach_count) > 1 || \ 908 pte_t pte;
931 (__mm) != current->active_mm) \ 909
932 ptep_invalidate(__mm, __address, __ptep); \ 910 mm->context.flush_mm = 1;
933 else \ 911 if (mm_has_pgste(mm))
934 pte_clear((__mm), (__address), (__ptep)); \ 912 pgste = pgste_get_lock(ptep);
935 __pte; \ 913
936}) 914 pte = *ptep;
915 if (!mm_exclusive(mm))
916 __ptep_ipte(address, ptep);
917 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
918
919 if (mm_has_pgste(mm)) {
920 pgste = pgste_update_all(&pte, pgste);
921 pgste_set_unlock(ptep, pgste);
922 }
923 return pte;
924}
925
926#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
927static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
928 unsigned long address,
929 pte_t *ptep)
930{
931 pte_t pte;
932
933 mm->context.flush_mm = 1;
934 if (mm_has_pgste(mm))
935 pgste_get_lock(ptep);
936
937 pte = *ptep;
938 if (!mm_exclusive(mm))
939 __ptep_ipte(address, ptep);
940 return pte;
941}
942
943static inline void ptep_modify_prot_commit(struct mm_struct *mm,
944 unsigned long address,
945 pte_t *ptep, pte_t pte)
946{
947 *ptep = pte;
948 if (mm_has_pgste(mm))
949 pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE));
950}
937 951
938#define __HAVE_ARCH_PTEP_CLEAR_FLUSH 952#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
939static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, 953static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
940 unsigned long address, pte_t *ptep) 954 unsigned long address, pte_t *ptep)
941{ 955{
942 pte_t pte = *ptep; 956 pgste_t pgste;
943 ptep_invalidate(vma->vm_mm, address, ptep); 957 pte_t pte;
958
959 if (mm_has_pgste(vma->vm_mm))
960 pgste = pgste_get_lock(ptep);
961
962 pte = *ptep;
963 __ptep_ipte(address, ptep);
964 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
965
966 if (mm_has_pgste(vma->vm_mm)) {
967 pgste = pgste_update_all(&pte, pgste);
968 pgste_set_unlock(ptep, pgste);
969 }
944 return pte; 970 return pte;
945} 971}
946 972
@@ -953,76 +979,67 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
953 */ 979 */
954#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 980#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
955static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 981static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
956 unsigned long addr, 982 unsigned long address,
957 pte_t *ptep, int full) 983 pte_t *ptep, int full)
958{ 984{
959 pte_t pte = *ptep; 985 pgste_t pgste;
986 pte_t pte;
987
988 if (mm_has_pgste(mm))
989 pgste = pgste_get_lock(ptep);
990
991 pte = *ptep;
992 if (!full)
993 __ptep_ipte(address, ptep);
994 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
960 995
961 if (full) 996 if (mm_has_pgste(mm)) {
962 pte_clear(mm, addr, ptep); 997 pgste = pgste_update_all(&pte, pgste);
963 else 998 pgste_set_unlock(ptep, pgste);
964 ptep_invalidate(mm, addr, ptep); 999 }
965 return pte; 1000 return pte;
966} 1001}
967 1002
968#define __HAVE_ARCH_PTEP_SET_WRPROTECT 1003#define __HAVE_ARCH_PTEP_SET_WRPROTECT
969#define ptep_set_wrprotect(__mm, __addr, __ptep) \ 1004static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
970({ \ 1005 unsigned long address, pte_t *ptep)
971 pte_t __pte = *(__ptep); \ 1006{
972 if (pte_write(__pte)) { \ 1007 pgste_t pgste;
973 (__mm)->context.flush_mm = 1; \ 1008 pte_t pte = *ptep;
974 if (atomic_read(&(__mm)->context.attach_count) > 1 || \
975 (__mm) != current->active_mm) \
976 ptep_invalidate(__mm, __addr, __ptep); \
977 set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \
978 } \
979})
980 1009
981#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1010 if (pte_write(pte)) {
982#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ 1011 mm->context.flush_mm = 1;
983({ \ 1012 if (mm_has_pgste(mm))
984 int __changed = !pte_same(*(__ptep), __entry); \ 1013 pgste = pgste_get_lock(ptep);
985 if (__changed) { \
986 ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
987 set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
988 } \
989 __changed; \
990})
991 1014
992/* 1015 if (!mm_exclusive(mm))
993 * Test and clear dirty bit in storage key. 1016 __ptep_ipte(address, ptep);
994 * We can't clear the changed bit atomically. This is a potential 1017 *ptep = pte_wrprotect(pte);
995 * race against modification of the referenced bit. This function
996 * should therefore only be called if it is not mapped in any
997 * address space.
998 */
999#define __HAVE_ARCH_PAGE_TEST_DIRTY
1000static inline int page_test_dirty(struct page *page)
1001{
1002 return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0;
1003}
1004 1018
1005#define __HAVE_ARCH_PAGE_CLEAR_DIRTY 1019 if (mm_has_pgste(mm))
1006static inline void page_clear_dirty(struct page *page, int mapped) 1020 pgste_set_unlock(ptep, pgste);
1007{ 1021 }
1008 page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); 1022 return pte;
1009} 1023}
1010 1024
1011/* 1025#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1012 * Test and clear referenced bit in storage key. 1026static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1013 */ 1027 unsigned long address, pte_t *ptep,
1014#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG 1028 pte_t entry, int dirty)
1015static inline int page_test_and_clear_young(struct page *page)
1016{ 1029{
1017 unsigned long physpage = page_to_phys(page); 1030 pgste_t pgste;
1018 int ccode; 1031
1019 1032 if (pte_same(*ptep, entry))
1020 asm volatile( 1033 return 0;
1021 " rrbe 0,%1\n" 1034 if (mm_has_pgste(vma->vm_mm))
1022 " ipm %0\n" 1035 pgste = pgste_get_lock(ptep);
1023 " srl %0,28\n" 1036
1024 : "=d" (ccode) : "a" (physpage) : "cc" ); 1037 __ptep_ipte(address, ptep);
1025 return ccode & 2; 1038 *ptep = entry;
1039
1040 if (mm_has_pgste(vma->vm_mm))
1041 pgste_set_unlock(ptep, pgste);
1042 return 1;
1026} 1043}
1027 1044
1028/* 1045/*