aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/include/asm/pgtable.h
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2013-07-23 16:11:42 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2013-08-29 07:20:11 -0400
commit0944fe3f4a323f436180d39402cae7f9c46ead17 (patch)
tree7b2ada69ff7e3c1fae20ec0b1dffe5e0d0ec2cc6 /arch/s390/include/asm/pgtable.h
parentfbd70035fb2b03deb346052084794bc1d0e25aa2 (diff)
s390/mm: implement software referenced bits
The last remaining use for the storage key of the s390 architecture is reference counting. The alternative is to make page table entries invalid while they are old. On access the fault handler marks the pte/pmd as young which makes the pte/pmd valid if the access rights allow read access. The pte/pmd invalidations required for software managed reference bits cost a bit of performance, on the other hand the RRBE/RRBM instructions to read and reset the referenced bits are quite expensive as well. Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r--arch/s390/include/asm/pgtable.h345
1 files changed, 197 insertions, 148 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1d144b6f3ba2..9f215b40109e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -226,8 +226,9 @@ extern unsigned long MODULES_END;
226#define _PAGE_TYPE 0x002 /* SW pte type bit */ 226#define _PAGE_TYPE 0x002 /* SW pte type bit */
227#define _PAGE_YOUNG 0x004 /* SW pte young bit */ 227#define _PAGE_YOUNG 0x004 /* SW pte young bit */
228#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ 228#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
229#define _PAGE_WRITE 0x010 /* SW pte write bit */ 229#define _PAGE_READ 0x010 /* SW pte read bit */
230#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ 230#define _PAGE_WRITE 0x020 /* SW pte write bit */
231#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
231#define __HAVE_ARCH_PTE_SPECIAL 232#define __HAVE_ARCH_PTE_SPECIAL
232 233
233/* Set of bits not changed in pte_modify */ 234/* Set of bits not changed in pte_modify */
@@ -243,19 +244,25 @@ extern unsigned long MODULES_END;
243 * The following table gives the different possible bit combinations for 244 * The following table gives the different possible bit combinations for
244 * the pte hardware and software bits in the last 12 bits of a pte: 245 * the pte hardware and software bits in the last 12 bits of a pte:
245 * 246 *
246 * 842100000000 247 * 842100000000
247 * 000084210000 248 * 000084210000
248 * 000000008421 249 * 000000008421
249 * .IR....wdytp 250 * .IR...wrdytp
250 * empty .10....00000 251 * empty .10...000000
251 * swap .10....xxx10 252 * swap .10...xxxx10
252 * file .11....xxxx0 253 * file .11...xxxxx0
253 * prot-none, clean .11....00x01 254 * prot-none, clean, old .11...000001
254 * prot-none, dirty .10....01x01 255 * prot-none, clean, young .11...000101
255 * read-only, clean .01....00x01 256 * prot-none, dirty, old .10...001001
256 * read-only, dirty .01....01x01 257 * prot-none, dirty, young .10...001101
257 * read-write, clean .01....10x01 258 * read-only, clean, old .11...010001
258 * read-write, dirty .00....11x01 259 * read-only, clean, young .01...010101
260 * read-only, dirty, old .11...011001
261 * read-only, dirty, young .01...011101
262 * read-write, clean, old .11...110001
263 * read-write, clean, young .01...110101
264 * read-write, dirty, old .10...111001
265 * read-write, dirty, young .00...111101
259 * 266 *
260 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 267 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
261 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 268 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
@@ -273,15 +280,26 @@ extern unsigned long MODULES_END;
273#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ 280#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
274 281
275/* Bits in the segment table entry */ 282/* Bits in the segment table entry */
283#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
276#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ 284#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
277#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ 285#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
278#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ 286#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
279#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ 287#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
280#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ 288#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
289#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
281 290
282#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) 291#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
283#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) 292#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
284 293
294/*
295 * Segment table entry encoding (I = invalid, R = read-only bit):
296 * ..R...I.....
297 * prot-none ..1...1.....
298 * read-only ..1...0.....
299 * read-write ..0...0.....
300 * empty ..0...1.....
301 */
302
285/* Page status table bits for virtualization */ 303/* Page status table bits for virtualization */
286#define PGSTE_ACC_BITS 0xf0000000UL 304#define PGSTE_ACC_BITS 0xf0000000UL
287#define PGSTE_FP_BIT 0x08000000UL 305#define PGSTE_FP_BIT 0x08000000UL
@@ -290,9 +308,7 @@ extern unsigned long MODULES_END;
290#define PGSTE_HC_BIT 0x00200000UL 308#define PGSTE_HC_BIT 0x00200000UL
291#define PGSTE_GR_BIT 0x00040000UL 309#define PGSTE_GR_BIT 0x00040000UL
292#define PGSTE_GC_BIT 0x00020000UL 310#define PGSTE_GC_BIT 0x00020000UL
293#define PGSTE_UR_BIT 0x00008000UL 311#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
294#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */
295#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */
296 312
297#else /* CONFIG_64BIT */ 313#else /* CONFIG_64BIT */
298 314
@@ -331,6 +347,8 @@ extern unsigned long MODULES_END;
331#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ 347#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
332 348
333/* Bits in the segment table entry */ 349/* Bits in the segment table entry */
350#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
351#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
334#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ 352#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
335#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ 353#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
336#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ 354#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
@@ -342,6 +360,21 @@ extern unsigned long MODULES_END;
342#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ 360#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
343#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ 361#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
344#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ 362#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
363#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
364#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
365
366/*
367 * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
368 * ..R...I...y.
369 * prot-none, old ..0...1...1.
370 * prot-none, young ..1...1...1.
371 * read-only, old ..1...1...0.
372 * read-only, young ..1...0...1.
373 * read-write, old ..0...1...0.
374 * read-write, young ..0...0...1.
375 * The segment table origin is used to distinguish empty (origin==0) from
376 * read-write, old segment table entries (origin!=0)
377 */
345 378
346#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ 379#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
347 380
@@ -357,9 +390,7 @@ extern unsigned long MODULES_END;
357#define PGSTE_HC_BIT 0x0020000000000000UL 390#define PGSTE_HC_BIT 0x0020000000000000UL
358#define PGSTE_GR_BIT 0x0004000000000000UL 391#define PGSTE_GR_BIT 0x0004000000000000UL
359#define PGSTE_GC_BIT 0x0002000000000000UL 392#define PGSTE_GC_BIT 0x0002000000000000UL
360#define PGSTE_UR_BIT 0x0000800000000000UL 393#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
361#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */
362#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
363 394
364#endif /* CONFIG_64BIT */ 395#endif /* CONFIG_64BIT */
365 396
@@ -375,12 +406,17 @@ extern unsigned long MODULES_END;
375 * Page protection definitions. 406 * Page protection definitions.
376 */ 407 */
377#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) 408#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
378#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) 409#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
379#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_PROTECT) 410 _PAGE_INVALID | _PAGE_PROTECT)
380 411#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
381#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) 412 _PAGE_INVALID | _PAGE_PROTECT)
382#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) 413
383#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) 414#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
415 _PAGE_YOUNG | _PAGE_DIRTY)
416#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
417 _PAGE_YOUNG | _PAGE_DIRTY)
418#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
419 _PAGE_PROTECT)
384 420
385/* 421/*
386 * On s390 the page table entry has an invalid bit and a read-only bit. 422 * On s390 the page table entry has an invalid bit and a read-only bit.
@@ -410,9 +446,10 @@ extern unsigned long MODULES_END;
410 * Segment entry (large page) protection definitions. 446 * Segment entry (large page) protection definitions.
411 */ 447 */
412#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ 448#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
449 _SEGMENT_ENTRY_NONE)
450#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
413 _SEGMENT_ENTRY_PROTECT) 451 _SEGMENT_ENTRY_PROTECT)
414#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT) 452#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
415#define SEGMENT_WRITE __pgprot(0)
416 453
417static inline int mm_has_pgste(struct mm_struct *mm) 454static inline int mm_has_pgste(struct mm_struct *mm)
418{ 455{
@@ -520,10 +557,19 @@ static inline int pmd_large(pmd_t pmd)
520#endif 557#endif
521} 558}
522 559
560static inline int pmd_prot_none(pmd_t pmd)
561{
562 return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
563 (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
564}
565
523static inline int pmd_bad(pmd_t pmd) 566static inline int pmd_bad(pmd_t pmd)
524{ 567{
525 unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INVALID; 568#ifdef CONFIG_64BIT
526 return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; 569 if (pmd_large(pmd))
570 return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
571#endif
572 return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
527} 573}
528 574
529#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH 575#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -542,12 +588,21 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
542#define __HAVE_ARCH_PMD_WRITE 588#define __HAVE_ARCH_PMD_WRITE
543static inline int pmd_write(pmd_t pmd) 589static inline int pmd_write(pmd_t pmd)
544{ 590{
591 if (pmd_prot_none(pmd))
592 return 0;
545 return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; 593 return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
546} 594}
547 595
548static inline int pmd_young(pmd_t pmd) 596static inline int pmd_young(pmd_t pmd)
549{ 597{
550 return 0; 598 int young = 0;
599#ifdef CONFIG_64BIT
600 if (pmd_prot_none(pmd))
601 young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
602 else
603 young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
604#endif
605 return young;
551} 606}
552 607
553static inline int pte_present(pte_t pte) 608static inline int pte_present(pte_t pte)
@@ -632,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
632static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) 687static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
633{ 688{
634#ifdef CONFIG_PGSTE 689#ifdef CONFIG_PGSTE
635 unsigned long address, bits; 690 unsigned long address, bits, skey;
636 unsigned char skey;
637 691
638 if (pte_val(*ptep) & _PAGE_INVALID) 692 if (pte_val(*ptep) & _PAGE_INVALID)
639 return pgste; 693 return pgste;
640 address = pte_val(*ptep) & PAGE_MASK; 694 address = pte_val(*ptep) & PAGE_MASK;
641 skey = page_get_storage_key(address); 695 skey = (unsigned long) page_get_storage_key(address);
642 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 696 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
643 /* Clear page changed & referenced bit in the storage key */ 697 if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
644 if (bits & _PAGE_CHANGED) 698 /* Transfer dirty + referenced bit to host bits in pgste */
699 pgste_val(pgste) |= bits << 52;
645 page_set_storage_key(address, skey ^ bits, 0); 700 page_set_storage_key(address, skey ^ bits, 0);
646 else if (bits) 701 } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
702 (bits & _PAGE_REFERENCED)) {
703 /* Transfer referenced bit to host bit in pgste */
704 pgste_val(pgste) |= PGSTE_HR_BIT;
647 page_reset_referenced(address); 705 page_reset_referenced(address);
706 }
648 /* Transfer page changed & referenced bit to guest bits in pgste */ 707 /* Transfer page changed & referenced bit to guest bits in pgste */
649 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 708 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
650 /* Get host changed & referenced bits from pgste */
651 bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
652 /* Transfer page changed & referenced bit to kvm user bits */
653 pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
654 /* Clear relevant host bits in pgste. */
655 pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
656 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
657 /* Copy page access key and fetch protection bit to pgste */ 709 /* Copy page access key and fetch protection bit to pgste */
658 pgste_val(pgste) |= 710 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
659 (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 711 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
660 /* Transfer referenced bit to pte */
661 pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
662#endif 712#endif
663 return pgste; 713 return pgste;
664 714
@@ -667,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
667static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) 717static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
668{ 718{
669#ifdef CONFIG_PGSTE 719#ifdef CONFIG_PGSTE
670 int young;
671
672 if (pte_val(*ptep) & _PAGE_INVALID) 720 if (pte_val(*ptep) & _PAGE_INVALID)
673 return pgste; 721 return pgste;
674 /* Get referenced bit from storage key */ 722 /* Get referenced bit from storage key */
675 young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); 723 if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
676 if (young) 724 pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
677 pgste_val(pgste) |= PGSTE_GR_BIT;
678 /* Get host referenced bit from pgste */
679 if (pgste_val(pgste) & PGSTE_HR_BIT) {
680 pgste_val(pgste) &= ~PGSTE_HR_BIT;
681 young = 1;
682 }
683 /* Transfer referenced bit to kvm user bits and pte */
684 if (young) {
685 pgste_val(pgste) |= PGSTE_UR_BIT;
686 pte_val(*ptep) |= _PAGE_YOUNG;
687 }
688#endif 725#endif
689 return pgste; 726 return pgste;
690} 727}
@@ -839,11 +876,7 @@ static inline int pte_dirty(pte_t pte)
839 876
840static inline int pte_young(pte_t pte) 877static inline int pte_young(pte_t pte)
841{ 878{
842#ifdef CONFIG_PGSTE 879 return (pte_val(pte) & _PAGE_YOUNG) != 0;
843 if (pte_val(pte) & _PAGE_YOUNG)
844 return 1;
845#endif
846 return 0;
847} 880}
848 881
849/* 882/*
@@ -884,6 +917,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
884{ 917{
885 pte_val(pte) &= _PAGE_CHG_MASK; 918 pte_val(pte) &= _PAGE_CHG_MASK;
886 pte_val(pte) |= pgprot_val(newprot); 919 pte_val(pte) |= pgprot_val(newprot);
920 /*
921 * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
922 * invalid bit set, clear it again for readable, young pages
923 */
924 if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
925 pte_val(pte) &= ~_PAGE_INVALID;
926 /*
927 * newprot for PAGE_READ and PAGE_WRITE has the page protection
928 * bit set, clear it again for writable, dirty pages
929 */
887 if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) 930 if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
888 pte_val(pte) &= ~_PAGE_PROTECT; 931 pte_val(pte) &= ~_PAGE_PROTECT;
889 return pte; 932 return pte;
@@ -921,14 +964,16 @@ static inline pte_t pte_mkdirty(pte_t pte)
921 964
922static inline pte_t pte_mkold(pte_t pte) 965static inline pte_t pte_mkold(pte_t pte)
923{ 966{
924#ifdef CONFIG_PGSTE
925 pte_val(pte) &= ~_PAGE_YOUNG; 967 pte_val(pte) &= ~_PAGE_YOUNG;
926#endif 968 pte_val(pte) |= _PAGE_INVALID;
927 return pte; 969 return pte;
928} 970}
929 971
930static inline pte_t pte_mkyoung(pte_t pte) 972static inline pte_t pte_mkyoung(pte_t pte)
931{ 973{
974 pte_val(pte) |= _PAGE_YOUNG;
975 if (pte_val(pte) & _PAGE_READ)
976 pte_val(pte) &= ~_PAGE_INVALID;
932 return pte; 977 return pte;
933} 978}
934 979
@@ -958,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
958 if (mm_has_pgste(mm)) { 1003 if (mm_has_pgste(mm)) {
959 pgste = pgste_get_lock(ptep); 1004 pgste = pgste_get_lock(ptep);
960 pgste = pgste_update_all(ptep, pgste); 1005 pgste = pgste_update_all(ptep, pgste);
961 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 1006 dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
962 pgste_val(pgste) &= ~PGSTE_UC_BIT; 1007 pgste_val(pgste) &= ~PGSTE_HC_BIT;
963 pgste_set_unlock(ptep, pgste); 1008 pgste_set_unlock(ptep, pgste);
964 return dirty; 1009 return dirty;
965 } 1010 }
@@ -978,42 +1023,13 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
978 if (mm_has_pgste(mm)) { 1023 if (mm_has_pgste(mm)) {
979 pgste = pgste_get_lock(ptep); 1024 pgste = pgste_get_lock(ptep);
980 pgste = pgste_update_young(ptep, pgste); 1025 pgste = pgste_update_young(ptep, pgste);
981 young = !!(pgste_val(pgste) & PGSTE_UR_BIT); 1026 young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
982 pgste_val(pgste) &= ~PGSTE_UR_BIT; 1027 pgste_val(pgste) &= ~PGSTE_HR_BIT;
983 pgste_set_unlock(ptep, pgste); 1028 pgste_set_unlock(ptep, pgste);
984 } 1029 }
985 return young; 1030 return young;
986} 1031}
987 1032
988#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
989static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
990 unsigned long addr, pte_t *ptep)
991{
992 pgste_t pgste;
993 pte_t pte;
994
995 if (mm_has_pgste(vma->vm_mm)) {
996 pgste = pgste_get_lock(ptep);
997 pgste = pgste_update_young(ptep, pgste);
998 pte = *ptep;
999 *ptep = pte_mkold(pte);
1000 pgste_set_unlock(ptep, pgste);
1001 return pte_young(pte);
1002 }
1003 return 0;
1004}
1005
1006#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1007static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1008 unsigned long address, pte_t *ptep)
1009{
1010 /* No need to flush TLB
1011 * On s390 reference bits are in storage key and never in TLB
1012 * With virtualization we handle the reference bit, without we
1013 * we can simply return */
1014 return ptep_test_and_clear_young(vma, address, ptep);
1015}
1016
1017static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 1033static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
1018{ 1034{
1019 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 1035 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
@@ -1042,6 +1058,40 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
1042 mm->context.flush_mm = 1; 1058 mm->context.flush_mm = 1;
1043} 1059}
1044 1060
1061#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1062static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1063 unsigned long addr, pte_t *ptep)
1064{
1065 pgste_t pgste;
1066 pte_t pte;
1067 int young;
1068
1069 if (mm_has_pgste(vma->vm_mm)) {
1070 pgste = pgste_get_lock(ptep);
1071 pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1072 }
1073
1074 pte = *ptep;
1075 __ptep_ipte(addr, ptep);
1076 young = pte_young(pte);
1077 pte = pte_mkold(pte);
1078
1079 if (mm_has_pgste(vma->vm_mm)) {
1080 pgste_set_pte(ptep, pte);
1081 pgste_set_unlock(ptep, pgste);
1082 } else
1083 *ptep = pte;
1084
1085 return young;
1086}
1087
1088#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1089static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1090 unsigned long address, pte_t *ptep)
1091{
1092 return ptep_test_and_clear_young(vma, address, ptep);
1093}
1094
1045/* 1095/*
1046 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush 1096 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
1047 * both clear the TLB for the unmapped pte. The reason is that 1097 * both clear the TLB for the unmapped pte. The reason is that
@@ -1229,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
1229{ 1279{
1230 pte_t __pte; 1280 pte_t __pte;
1231 pte_val(__pte) = physpage + pgprot_val(pgprot); 1281 pte_val(__pte) = physpage + pgprot_val(pgprot);
1232 return __pte; 1282 return pte_mkyoung(__pte);
1233} 1283}
1234 1284
1235static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) 1285static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
@@ -1338,10 +1388,41 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
1338 return pgprot_val(SEGMENT_WRITE); 1388 return pgprot_val(SEGMENT_WRITE);
1339} 1389}
1340 1390
1391static inline pmd_t pmd_mkyoung(pmd_t pmd)
1392{
1393#ifdef CONFIG_64BIT
1394 if (pmd_prot_none(pmd)) {
1395 pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1396 } else {
1397 pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
1398 pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
1399 }
1400#endif
1401 return pmd;
1402}
1403
1404static inline pmd_t pmd_mkold(pmd_t pmd)
1405{
1406#ifdef CONFIG_64BIT
1407 if (pmd_prot_none(pmd)) {
1408 pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1409 } else {
1410 pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
1411 pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
1412 }
1413#endif
1414 return pmd;
1415}
1416
1341static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) 1417static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1342{ 1418{
1419 int young;
1420
1421 young = pmd_young(pmd);
1343 pmd_val(pmd) &= _SEGMENT_CHG_MASK; 1422 pmd_val(pmd) &= _SEGMENT_CHG_MASK;
1344 pmd_val(pmd) |= massage_pgprot_pmd(newprot); 1423 pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1424 if (young)
1425 pmd = pmd_mkyoung(pmd);
1345 return pmd; 1426 return pmd;
1346} 1427}
1347 1428
@@ -1349,13 +1430,13 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
1349{ 1430{
1350 pmd_t __pmd; 1431 pmd_t __pmd;
1351 pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); 1432 pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
1352 return __pmd; 1433 return pmd_mkyoung(__pmd);
1353} 1434}
1354 1435
1355static inline pmd_t pmd_mkwrite(pmd_t pmd) 1436static inline pmd_t pmd_mkwrite(pmd_t pmd)
1356{ 1437{
1357 /* Do not clobber PROT_NONE pages! */ 1438 /* Do not clobber PROT_NONE segments! */
1358 if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)) 1439 if (!pmd_prot_none(pmd))
1359 pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; 1440 pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1360 return pmd; 1441 return pmd;
1361} 1442}
@@ -1391,7 +1472,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
1391 1472
1392static inline pmd_t pmd_wrprotect(pmd_t pmd) 1473static inline pmd_t pmd_wrprotect(pmd_t pmd)
1393{ 1474{
1394 pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; 1475 /* Do not clobber PROT_NONE segments! */
1476 if (!pmd_prot_none(pmd))
1477 pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1395 return pmd; 1478 return pmd;
1396} 1479}
1397 1480
@@ -1401,50 +1484,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
1401 return pmd; 1484 return pmd;
1402} 1485}
1403 1486
1404static inline pmd_t pmd_mkold(pmd_t pmd)
1405{
1406 /* No referenced bit in the segment table entry. */
1407 return pmd;
1408}
1409
1410static inline pmd_t pmd_mkyoung(pmd_t pmd)
1411{
1412 /* No referenced bit in the segment table entry. */
1413 return pmd;
1414}
1415
1416#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 1487#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1417static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 1488static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1418 unsigned long address, pmd_t *pmdp) 1489 unsigned long address, pmd_t *pmdp)
1419{ 1490{
1420 unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; 1491 pmd_t pmd;
1421 long tmp, rc;
1422 int counter;
1423 1492
1424 rc = 0; 1493 pmd = *pmdp;
1425 if (MACHINE_HAS_RRBM) { 1494 __pmd_idte(address, pmdp);
1426 counter = PTRS_PER_PTE >> 6; 1495 *pmdp = pmd_mkold(pmd);
1427 asm volatile( 1496 return pmd_young(pmd);
1428 "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
1429 " ogr %1,%0\n"
1430 " la %3,0(%4,%3)\n"
1431 " brct %2,0b\n"
1432 : "=&d" (tmp), "+&d" (rc), "+d" (counter),
1433 "+a" (pmd_addr)
1434 : "a" (64 * 4096UL) : "cc");
1435 rc = !!rc;
1436 } else {
1437 counter = PTRS_PER_PTE;
1438 asm volatile(
1439 "0: rrbe 0,%2\n"
1440 " la %2,0(%3,%2)\n"
1441 " brc 12,1f\n"
1442 " lhi %0,1\n"
1443 "1: brct %1,0b\n"
1444 : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
1445 : "a" (4096UL) : "cc");
1446 }
1447 return rc;
1448} 1497}
1449 1498
1450#define __HAVE_ARCH_PMDP_GET_AND_CLEAR 1499#define __HAVE_ARCH_PMDP_GET_AND_CLEAR