aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2011-05-23 04:24:40 -0400
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2011-05-23 04:24:31 -0400
commitb2fa47e6bf5148aa6dbf22ec79f18141b421eeba (patch)
tree8786785aefa2fbbc33fa590ac4b9a58947c2aac8 /arch
parent2d42552d1c1659b014851cf449ad2fe458509128 (diff)
[S390] refactor page table functions for better pgste support
Rework the architecture page table functions to access the bits in the page table extension array (pgste). There are a number of changes: 1) Fix missing pgste update if the attach_count for the mm is <= 1. 2) For every operation that affects the invalid bit in the pte or the rcp byte in the pgste the pcl lock needs to be acquired. The function pgste_get_lock gets the pcl lock and returns the current pgste value for a pte pointer. The function pgste_set_unlock stores the pgste and releases the lock. Between these two calls the bits in the pgste can be shuffled. 3) Define two software bits in the pte _PAGE_SWR and _PAGE_SWC to avoid calling SetPageDirty and SetPageReferenced from pgtable.h. If the host reference backup bit or the host change backup bit has been set the dirty/referenced state is transfered to the pte. The common code will pick up the state from the pte. 4) Add ptep_modify_prot_start and ptep_modify_prot_commit for mprotect. 5) Remove pgd_populate_kernel, pud_populate_kernel, pmd_populate_kernel pgd_clear_kernel, pud_clear_kernel, pmd_clear_kernel and ptep_invalidate. 6) Rename kvm_s390_test_and_clear_page_dirty to ptep_test_and_clear_user_dirty and add ptep_test_and_clear_user_young. 7) Define mm_exclusive() and mm_has_pgste() helper to improve readability. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/include/asm/mmu.h6
-rw-r--r--arch/s390/include/asm/page.h4
-rw-r--r--arch/s390/include/asm/pgalloc.h29
-rw-r--r--arch/s390/include/asm/pgtable.h506
-rw-r--r--arch/s390/mm/init.c3
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/vmem.c14
7 files changed, 331 insertions, 233 deletions
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 818e8298a6bd..82d0847896a0 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -9,8 +9,10 @@ typedef struct {
9 unsigned long asce_bits; 9 unsigned long asce_bits;
10 unsigned long asce_limit; 10 unsigned long asce_limit;
11 unsigned long vdso_base; 11 unsigned long vdso_base;
12 int has_pgste; /* The mmu context has extended page tables */ 12 /* Cloned contexts will be created with extended page tables. */
13 int alloc_pgste; /* cloned contexts will have extended page tables */ 13 unsigned int alloc_pgste:1;
14 /* The mmu context has extended page tables. */
15 unsigned int has_pgste:1;
14} mm_context_t; 16} mm_context_t;
15 17
16#define INIT_MM_CONTEXT(name) \ 18#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 81ee2776088d..accb372ddc7e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from)
90 */ 90 */
91 91
92typedef struct { unsigned long pgprot; } pgprot_t; 92typedef struct { unsigned long pgprot; } pgprot_t;
93typedef struct { unsigned long pgste; } pgste_t;
93typedef struct { unsigned long pte; } pte_t; 94typedef struct { unsigned long pte; } pte_t;
94typedef struct { unsigned long pmd; } pmd_t; 95typedef struct { unsigned long pmd; } pmd_t;
95typedef struct { unsigned long pud; } pud_t; 96typedef struct { unsigned long pud; } pud_t;
@@ -97,13 +98,16 @@ typedef struct { unsigned long pgd; } pgd_t;
97typedef pte_t *pgtable_t; 98typedef pte_t *pgtable_t;
98 99
99#define pgprot_val(x) ((x).pgprot) 100#define pgprot_val(x) ((x).pgprot)
101#define pgste_val(x) ((x).pgste)
100#define pte_val(x) ((x).pte) 102#define pte_val(x) ((x).pte)
101#define pmd_val(x) ((x).pmd) 103#define pmd_val(x) ((x).pmd)
102#define pud_val(x) ((x).pud) 104#define pud_val(x) ((x).pud)
103#define pgd_val(x) ((x).pgd) 105#define pgd_val(x) ((x).pgd)
104 106
107#define __pgste(x) ((pgste_t) { (x) } )
105#define __pte(x) ((pte_t) { (x) } ) 108#define __pte(x) ((pte_t) { (x) } )
106#define __pmd(x) ((pmd_t) { (x) } ) 109#define __pmd(x) ((pmd_t) { (x) } )
110#define __pud(x) ((pud_t) { (x) } )
107#define __pgd(x) ((pgd_t) { (x) } ) 111#define __pgd(x) ((pgd_t) { (x) } )
108#define __pgprot(x) ((pgprot_t) { (x) } ) 112#define __pgprot(x) ((pgprot_t) { (x) } )
109 113
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 739ff9ec1395..f6314af3b354 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -65,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
65#define pmd_free(mm, x) do { } while (0) 65#define pmd_free(mm, x) do { } while (0)
66 66
67#define pgd_populate(mm, pgd, pud) BUG() 67#define pgd_populate(mm, pgd, pud) BUG()
68#define pgd_populate_kernel(mm, pgd, pud) BUG()
69
70#define pud_populate(mm, pud, pmd) BUG() 68#define pud_populate(mm, pud, pmd) BUG()
71#define pud_populate_kernel(mm, pud, pmd) BUG()
72 69
73#else /* __s390x__ */ 70#else /* __s390x__ */
74 71
@@ -102,26 +99,14 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
102} 99}
103#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) 100#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd)
104 101
105static inline void pgd_populate_kernel(struct mm_struct *mm,
106 pgd_t *pgd, pud_t *pud)
107{
108 pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
109}
110
111static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 102static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
112{ 103{
113 pgd_populate_kernel(mm, pgd, pud); 104 pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
114}
115
116static inline void pud_populate_kernel(struct mm_struct *mm,
117 pud_t *pud, pmd_t *pmd)
118{
119 pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
120} 105}
121 106
122static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 107static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
123{ 108{
124 pud_populate_kernel(mm, pud, pmd); 109 pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
125} 110}
126 111
127#endif /* __s390x__ */ 112#endif /* __s390x__ */
@@ -134,18 +119,14 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
134} 119}
135#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) 120#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
136 121
137static inline void pmd_populate_kernel(struct mm_struct *mm,
138 pmd_t *pmd, pte_t *pte)
139{
140 pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
141}
142
143static inline void pmd_populate(struct mm_struct *mm, 122static inline void pmd_populate(struct mm_struct *mm,
144 pmd_t *pmd, pgtable_t pte) 123 pmd_t *pmd, pgtable_t pte)
145{ 124{
146 pmd_populate_kernel(mm, pmd, pte); 125 pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
147} 126}
148 127
128#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
129
149#define pmd_pgtable(pmd) \ 130#define pmd_pgtable(pmd) \
150 (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) 131 (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
151 132
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 4ca4dd2b329a..c4773a2ef3d3 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -31,9 +31,8 @@
31#ifndef __ASSEMBLY__ 31#ifndef __ASSEMBLY__
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/mm_types.h> 33#include <linux/mm_types.h>
34#include <asm/bitops.h>
35#include <asm/bug.h> 34#include <asm/bug.h>
36#include <asm/processor.h> 35#include <asm/page.h>
37 36
38extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); 37extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
39extern void paging_init(void); 38extern void paging_init(void);
@@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START;
243/* Software bits in the page table entry */ 242/* Software bits in the page table entry */
244#define _PAGE_SWT 0x001 /* SW pte type bit t */ 243#define _PAGE_SWT 0x001 /* SW pte type bit t */
245#define _PAGE_SWX 0x002 /* SW pte type bit x */ 244#define _PAGE_SWX 0x002 /* SW pte type bit x */
246#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ 245#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */
246#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */
247#define _PAGE_SPECIAL 0x010 /* SW associated with special page */
247#define __HAVE_ARCH_PTE_SPECIAL 248#define __HAVE_ARCH_PTE_SPECIAL
248 249
249/* Set of bits not changed in pte_modify */ 250/* Set of bits not changed in pte_modify */
250#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) 251#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR)
251 252
252/* Six different types of pages. */ 253/* Six different types of pages. */
253#define _PAGE_TYPE_EMPTY 0x400 254#define _PAGE_TYPE_EMPTY 0x400
@@ -293,14 +294,17 @@ extern unsigned long VMALLOC_START;
293 */ 294 */
294 295
295/* Page status table bits for virtualization */ 296/* Page status table bits for virtualization */
296#define RCP_PCL_BIT 55 297#define RCP_ACC_BITS 0xf000000000000000UL
297#define RCP_HR_BIT 54 298#define RCP_FP_BIT 0x0800000000000000UL
298#define RCP_HC_BIT 53 299#define RCP_PCL_BIT 0x0080000000000000UL
299#define RCP_GR_BIT 50 300#define RCP_HR_BIT 0x0040000000000000UL
300#define RCP_GC_BIT 49 301#define RCP_HC_BIT 0x0020000000000000UL
301 302#define RCP_GR_BIT 0x0004000000000000UL
302/* User dirty bit for KVM's migration feature */ 303#define RCP_GC_BIT 0x0002000000000000UL
303#define KVM_UD_BIT 47 304
305/* User dirty / referenced bit for KVM's migration feature */
306#define KVM_UR_BIT 0x0000800000000000UL
307#define KVM_UC_BIT 0x0000400000000000UL
304 308
305#ifndef __s390x__ 309#ifndef __s390x__
306 310
@@ -407,17 +411,20 @@ extern unsigned long VMALLOC_START;
407#define __S110 PAGE_RW 411#define __S110 PAGE_RW
408#define __S111 PAGE_RW 412#define __S111 PAGE_RW
409 413
410/* 414static inline int mm_exclusive(struct mm_struct *mm)
411 * Certain architectures need to do special things when PTEs
412 * within a page table are directly modified. Thus, the following
413 * hook is made available.
414 */
415static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
416 pte_t *ptep, pte_t entry)
417{ 415{
418 *ptep = entry; 416 return likely(mm == current->active_mm &&
417 atomic_read(&mm->context.attach_count) <= 1);
419} 418}
420 419
420static inline int mm_has_pgste(struct mm_struct *mm)
421{
422#ifdef CONFIG_PGSTE
423 if (unlikely(mm->context.has_pgste))
424 return 1;
425#endif
426 return 0;
427}
421/* 428/*
422 * pgd/pmd/pte query functions 429 * pgd/pmd/pte query functions
423 */ 430 */
@@ -530,53 +537,130 @@ static inline int pte_special(pte_t pte)
530} 537}
531 538
532#define __HAVE_ARCH_PTE_SAME 539#define __HAVE_ARCH_PTE_SAME
533#define pte_same(a,b) (pte_val(a) == pte_val(b)) 540static inline int pte_same(pte_t a, pte_t b)
541{
542 return pte_val(a) == pte_val(b);
543}
534 544
535static inline void rcp_lock(pte_t *ptep) 545static inline pgste_t pgste_get_lock(pte_t *ptep)
536{ 546{
547 unsigned long new = 0;
537#ifdef CONFIG_PGSTE 548#ifdef CONFIG_PGSTE
538 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 549 unsigned long old;
550
539 preempt_disable(); 551 preempt_disable();
540 while (test_and_set_bit(RCP_PCL_BIT, pgste)) 552 asm(
541 ; 553 " lg %0,%2\n"
554 "0: lgr %1,%0\n"
555 " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */
556 " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */
557 " csg %0,%1,%2\n"
558 " jl 0b\n"
559 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
560 : "Q" (ptep[PTRS_PER_PTE]) : "cc");
542#endif 561#endif
562 return __pgste(new);
543} 563}
544 564
545static inline void rcp_unlock(pte_t *ptep) 565static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
546{ 566{
547#ifdef CONFIG_PGSTE 567#ifdef CONFIG_PGSTE
548 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 568 asm(
549 clear_bit(RCP_PCL_BIT, pgste); 569 " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */
570 " stg %1,%0\n"
571 : "=Q" (ptep[PTRS_PER_PTE])
572 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc");
550 preempt_enable(); 573 preempt_enable();
551#endif 574#endif
552} 575}
553 576
554#include <linux/page-flags.h> 577static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
555
556static inline void ptep_rcp_copy(pte_t *ptep)
557{ 578{
558#ifdef CONFIG_PGSTE 579#ifdef CONFIG_PGSTE
559 struct page *page = virt_to_page(pte_val(*ptep)); 580 unsigned long pfn, bits;
560 unsigned int skey; 581 unsigned char skey;
561 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 582
562 583 pfn = pte_val(*ptep) >> PAGE_SHIFT;
563 skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT); 584 skey = page_get_storage_key(pfn);
564 if (skey & _PAGE_CHANGED) { 585 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
565 set_bit_simple(RCP_GC_BIT, pgste); 586 /* Clear page changed & referenced bit in the storage key */
566 set_bit_simple(KVM_UD_BIT, pgste); 587 if (bits) {
567 } 588 skey ^= bits;
568 if (skey & _PAGE_REFERENCED) 589 page_set_storage_key(pfn, skey, 1);
569 set_bit_simple(RCP_GR_BIT, pgste);
570 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
571 SetPageDirty(page);
572 set_bit_simple(KVM_UD_BIT, pgste);
573 } 590 }
574 if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) 591 /* Transfer page changed & referenced bit to guest bits in pgste */
575 SetPageReferenced(page); 592 pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */
593 /* Get host changed & referenced bits from pgste */
594 bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
595 /* Clear host bits in pgste. */
596 pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
597 pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
598 /* Copy page access key and fetch protection bit to pgste */
599 pgste_val(pgste) |=
600 (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
601 /* Transfer changed and referenced to kvm user bits */
602 pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */
603 /* Transfer changed & referenced to pte sofware bits */
604 pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */
605#endif
606 return pgste;
607
608}
609
610static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
611{
612#ifdef CONFIG_PGSTE
613 int young;
614
615 young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
616 /* Transfer page referenced bit to pte software bit (host view) */
617 if (young || (pgste_val(pgste) & RCP_HR_BIT))
618 pte_val(*ptep) |= _PAGE_SWR;
619 /* Clear host referenced bit in pgste. */
620 pgste_val(pgste) &= ~RCP_HR_BIT;
621 /* Transfer page referenced bit to guest bit in pgste */
622 pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */
623#endif
624 return pgste;
625
626}
627
628static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste)
629{
630#ifdef CONFIG_PGSTE
631 unsigned long pfn;
632 unsigned long okey, nkey;
633
634 pfn = pte_val(*ptep) >> PAGE_SHIFT;
635 okey = nkey = page_get_storage_key(pfn);
636 nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT);
637 /* Set page access key and fetch protection bit from pgste */
638 nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
639 if (okey != nkey)
640 page_set_storage_key(pfn, nkey, 1);
576#endif 641#endif
577} 642}
578 643
579/* 644/*
645 * Certain architectures need to do special things when PTEs
646 * within a page table are directly modified. Thus, the following
647 * hook is made available.
648 */
649static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
650 pte_t *ptep, pte_t entry)
651{
652 pgste_t pgste;
653
654 if (mm_has_pgste(mm)) {
655 pgste = pgste_get_lock(ptep);
656 pgste_set_pte(ptep, pgste);
657 *ptep = entry;
658 pgste_set_unlock(ptep, pgste);
659 } else
660 *ptep = entry;
661}
662
663/*
580 * query functions pte_write/pte_dirty/pte_young only work if 664 * query functions pte_write/pte_dirty/pte_young only work if
581 * pte_present() is true. Undefined behaviour if not.. 665 * pte_present() is true. Undefined behaviour if not..
582 */ 666 */
@@ -587,19 +671,19 @@ static inline int pte_write(pte_t pte)
587 671
588static inline int pte_dirty(pte_t pte) 672static inline int pte_dirty(pte_t pte)
589{ 673{
590 /* A pte is neither clean nor dirty on s/390. The dirty bit 674#ifdef CONFIG_PGSTE
591 * is in the storage key. See page_test_and_clear_dirty for 675 if (pte_val(pte) & _PAGE_SWC)
592 * details. 676 return 1;
593 */ 677#endif
594 return 0; 678 return 0;
595} 679}
596 680
597static inline int pte_young(pte_t pte) 681static inline int pte_young(pte_t pte)
598{ 682{
599 /* A pte is neither young nor old on s/390. The young bit 683#ifdef CONFIG_PGSTE
600 * is in the storage key. See page_test_and_clear_young for 684 if (pte_val(pte) & _PAGE_SWR)
601 * details. 685 return 1;
602 */ 686#endif
603 return 0; 687 return 0;
604} 688}
605 689
@@ -607,46 +691,27 @@ static inline int pte_young(pte_t pte)
607 * pgd/pmd/pte modification functions 691 * pgd/pmd/pte modification functions
608 */ 692 */
609 693
610#ifndef __s390x__ 694static inline void pgd_clear(pgd_t *pgd)
611
612#define pgd_clear(pgd) do { } while (0)
613#define pud_clear(pud) do { } while (0)
614
615#else /* __s390x__ */
616
617static inline void pgd_clear_kernel(pgd_t * pgd)
618{ 695{
696#ifdef __s390x__
619 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) 697 if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
620 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; 698 pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
699#endif
621} 700}
622 701
623static inline void pgd_clear(pgd_t * pgd) 702static inline void pud_clear(pud_t *pud)
624{
625 pgd_clear_kernel(pgd);
626}
627
628static inline void pud_clear_kernel(pud_t *pud)
629{ 703{
704#ifdef __s390x__
630 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) 705 if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
631 pud_val(*pud) = _REGION3_ENTRY_EMPTY; 706 pud_val(*pud) = _REGION3_ENTRY_EMPTY;
707#endif
632} 708}
633 709
634static inline void pud_clear(pud_t *pud) 710static inline void pmd_clear(pmd_t *pmdp)
635{
636 pud_clear_kernel(pud);
637}
638#endif /* __s390x__ */
639
640static inline void pmd_clear_kernel(pmd_t * pmdp)
641{ 711{
642 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; 712 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
643} 713}
644 714
645static inline void pmd_clear(pmd_t *pmd)
646{
647 pmd_clear_kernel(pmd);
648}
649
650static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 715static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
651{ 716{
652 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 717 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
@@ -679,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte)
679 744
680static inline pte_t pte_mkclean(pte_t pte) 745static inline pte_t pte_mkclean(pte_t pte)
681{ 746{
682 /* The only user of pte_mkclean is the fork() code. 747#ifdef CONFIG_PGSTE
683 We must *not* clear the *physical* page dirty bit 748 pte_val(pte) &= ~_PAGE_SWC;
684 just because fork() wants to clear the dirty bit in 749#endif
685 *one* of the page's mappings. So we just do nothing. */
686 return pte; 750 return pte;
687} 751}
688 752
689static inline pte_t pte_mkdirty(pte_t pte) 753static inline pte_t pte_mkdirty(pte_t pte)
690{ 754{
691 /* We do not explicitly set the dirty bit because the
692 * sske instruction is slow. It is faster to let the
693 * next instruction set the dirty bit.
694 */
695 return pte; 755 return pte;
696} 756}
697 757
698static inline pte_t pte_mkold(pte_t pte) 758static inline pte_t pte_mkold(pte_t pte)
699{ 759{
700 /* S/390 doesn't keep its dirty/referenced bit in the pte. 760#ifdef CONFIG_PGSTE
701 * There is no point in clearing the real referenced bit. 761 pte_val(pte) &= ~_PAGE_SWR;
702 */ 762#endif
703 return pte; 763 return pte;
704} 764}
705 765
706static inline pte_t pte_mkyoung(pte_t pte) 766static inline pte_t pte_mkyoung(pte_t pte)
707{ 767{
708 /* S/390 doesn't keep its dirty/referenced bit in the pte.
709 * There is no point in setting the real referenced bit.
710 */
711 return pte; 768 return pte;
712} 769}
713 770
@@ -745,64 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte)
745} 802}
746#endif 803#endif
747 804
748#ifdef CONFIG_PGSTE
749/* 805/*
750 * Get (and clear) the user dirty bit for a PTE. 806 * Get (and clear) the user dirty bit for a pte.
751 */ 807 */
752static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, 808static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
753 pte_t *ptep) 809 pte_t *ptep)
754{ 810{
755 int dirty; 811 pgste_t pgste;
756 unsigned long *pgste; 812 int dirty = 0;
757 unsigned long pfn; 813
758 struct page *page; 814 if (mm_has_pgste(mm)) {
759 unsigned int skey; 815 pgste = pgste_get_lock(ptep);
760 816 pgste = pgste_update_all(ptep, pgste);
761 if (!mm->context.has_pgste) 817 dirty = !!(pgste_val(pgste) & KVM_UC_BIT);
762 return -EINVAL; 818 pgste_val(pgste) &= ~KVM_UC_BIT;
763 rcp_lock(ptep); 819 pgste_set_unlock(ptep, pgste);
764 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 820 return dirty;
765 pfn = pte_val(*ptep) >> PAGE_SHIFT;
766 page = pfn_to_page(pfn);
767 skey = page_get_storage_key(pfn);
768 if (skey & _PAGE_CHANGED) {
769 set_bit_simple(RCP_GC_BIT, pgste);
770 set_bit_simple(KVM_UD_BIT, pgste);
771 }
772 if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) {
773 SetPageDirty(page);
774 set_bit_simple(KVM_UD_BIT, pgste);
775 } 821 }
776 dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste);
777 if (skey & _PAGE_CHANGED)
778 page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1);
779 rcp_unlock(ptep);
780 return dirty; 822 return dirty;
781} 823}
782#endif 824
825/*
826 * Get (and clear) the user referenced bit for a pte.
827 */
828static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
829 pte_t *ptep)
830{
831 pgste_t pgste;
832 int young = 0;
833
834 if (mm_has_pgste(mm)) {
835 pgste = pgste_get_lock(ptep);
836 pgste = pgste_update_young(ptep, pgste);
837 young = !!(pgste_val(pgste) & KVM_UR_BIT);
838 pgste_val(pgste) &= ~KVM_UR_BIT;
839 pgste_set_unlock(ptep, pgste);
840 }
841 return young;
842}
783 843
784#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 844#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
785static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 845static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
786 unsigned long addr, pte_t *ptep) 846 unsigned long addr, pte_t *ptep)
787{ 847{
788#ifdef CONFIG_PGSTE 848 pgste_t pgste;
789 unsigned long pfn; 849 pte_t pte;
790 int young;
791 unsigned long *pgste;
792 850
793 if (!vma->vm_mm->context.has_pgste) 851 if (mm_has_pgste(vma->vm_mm)) {
794 return 0; 852 pgste = pgste_get_lock(ptep);
795 pfn = pte_val(*ptep) >> PAGE_SHIFT; 853 pgste = pgste_update_young(ptep, pgste);
796 pgste = (unsigned long *) (ptep + PTRS_PER_PTE); 854 pte = *ptep;
797 855 *ptep = pte_mkold(pte);
798 young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0); 856 pgste_set_unlock(ptep, pgste);
799 rcp_lock(ptep); 857 return pte_young(pte);
800 if (young) 858 }
801 set_bit_simple(RCP_GR_BIT, pgste);
802 young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste);
803 rcp_unlock(ptep);
804 return young;
805#endif
806 return 0; 859 return 0;
807} 860}
808 861
@@ -814,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
814 * On s390 reference bits are in storage key and never in TLB 867 * On s390 reference bits are in storage key and never in TLB
815 * With virtualization we handle the reference bit, without we 868 * With virtualization we handle the reference bit, without we
816 * we can simply return */ 869 * we can simply return */
817#ifdef CONFIG_PGSTE
818 return ptep_test_and_clear_young(vma, address, ptep); 870 return ptep_test_and_clear_young(vma, address, ptep);
819#endif
820 return 0;
821} 871}
822 872
823static inline void __ptep_ipte(unsigned long address, pte_t *ptep) 873static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
@@ -837,21 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
837 } 887 }
838} 888}
839 889
840static inline void ptep_invalidate(struct mm_struct *mm,
841 unsigned long address, pte_t *ptep)
842{
843 if (mm->context.has_pgste) {
844 rcp_lock(ptep);
845 __ptep_ipte(address, ptep);
846 ptep_rcp_copy(ptep);
847 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
848 rcp_unlock(ptep);
849 return;
850 }
851 __ptep_ipte(address, ptep);
852 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
853}
854
855/* 890/*
856 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush 891 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
857 * both clear the TLB for the unmapped pte. The reason is that 892 * both clear the TLB for the unmapped pte. The reason is that
@@ -866,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm,
866 * is a nop. 901 * is a nop.
867 */ 902 */
868#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 903#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
869#define ptep_get_and_clear(__mm, __address, __ptep) \ 904static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
870({ \ 905 unsigned long address, pte_t *ptep)
871 pte_t __pte = *(__ptep); \ 906{
872 (__mm)->context.flush_mm = 1; \ 907 pgste_t pgste;
873 if (atomic_read(&(__mm)->context.attach_count) > 1 || \ 908 pte_t pte;
874 (__mm) != current->active_mm) \ 909
875 ptep_invalidate(__mm, __address, __ptep); \ 910 mm->context.flush_mm = 1;
876 else \ 911 if (mm_has_pgste(mm))
877 pte_clear((__mm), (__address), (__ptep)); \ 912 pgste = pgste_get_lock(ptep);
878 __pte; \ 913
879}) 914 pte = *ptep;
915 if (!mm_exclusive(mm))
916 __ptep_ipte(address, ptep);
917 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
918
919 if (mm_has_pgste(mm)) {
920 pgste = pgste_update_all(&pte, pgste);
921 pgste_set_unlock(ptep, pgste);
922 }
923 return pte;
924}
925
926#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
927static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
928 unsigned long address,
929 pte_t *ptep)
930{
931 pte_t pte;
932
933 mm->context.flush_mm = 1;
934 if (mm_has_pgste(mm))
935 pgste_get_lock(ptep);
936
937 pte = *ptep;
938 if (!mm_exclusive(mm))
939 __ptep_ipte(address, ptep);
940 return pte;
941}
942
943static inline void ptep_modify_prot_commit(struct mm_struct *mm,
944 unsigned long address,
945 pte_t *ptep, pte_t pte)
946{
947 *ptep = pte;
948 if (mm_has_pgste(mm))
949 pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE));
950}
880 951
881#define __HAVE_ARCH_PTEP_CLEAR_FLUSH 952#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
882static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, 953static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
883 unsigned long address, pte_t *ptep) 954 unsigned long address, pte_t *ptep)
884{ 955{
885 pte_t pte = *ptep; 956 pgste_t pgste;
886 ptep_invalidate(vma->vm_mm, address, ptep); 957 pte_t pte;
958
959 if (mm_has_pgste(vma->vm_mm))
960 pgste = pgste_get_lock(ptep);
961
962 pte = *ptep;
963 __ptep_ipte(address, ptep);
964 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
965
966 if (mm_has_pgste(vma->vm_mm)) {
967 pgste = pgste_update_all(&pte, pgste);
968 pgste_set_unlock(ptep, pgste);
969 }
887 return pte; 970 return pte;
888} 971}
889 972
@@ -896,41 +979,68 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
896 */ 979 */
897#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 980#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
898static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 981static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
899 unsigned long addr, 982 unsigned long address,
900 pte_t *ptep, int full) 983 pte_t *ptep, int full)
901{ 984{
902 pte_t pte = *ptep; 985 pgste_t pgste;
986 pte_t pte;
903 987
904 if (full) 988 if (mm_has_pgste(mm))
905 pte_clear(mm, addr, ptep); 989 pgste = pgste_get_lock(ptep);
906 else 990
907 ptep_invalidate(mm, addr, ptep); 991 pte = *ptep;
992 if (!full)
993 __ptep_ipte(address, ptep);
994 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
995
996 if (mm_has_pgste(mm)) {
997 pgste = pgste_update_all(&pte, pgste);
998 pgste_set_unlock(ptep, pgste);
999 }
908 return pte; 1000 return pte;
909} 1001}
910 1002
911#define __HAVE_ARCH_PTEP_SET_WRPROTECT 1003#define __HAVE_ARCH_PTEP_SET_WRPROTECT
912#define ptep_set_wrprotect(__mm, __addr, __ptep) \ 1004static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
913({ \ 1005 unsigned long address, pte_t *ptep)
914 pte_t __pte = *(__ptep); \ 1006{
915 if (pte_write(__pte)) { \ 1007 pgste_t pgste;
916 (__mm)->context.flush_mm = 1; \ 1008 pte_t pte = *ptep;
917 if (atomic_read(&(__mm)->context.attach_count) > 1 || \ 1009
918 (__mm) != current->active_mm) \ 1010 if (pte_write(pte)) {
919 ptep_invalidate(__mm, __addr, __ptep); \ 1011 mm->context.flush_mm = 1;
920 set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ 1012 if (mm_has_pgste(mm))
921 } \ 1013 pgste = pgste_get_lock(ptep);
922}) 1014
1015 if (!mm_exclusive(mm))
1016 __ptep_ipte(address, ptep);
1017 *ptep = pte_wrprotect(pte);
1018
1019 if (mm_has_pgste(mm))
1020 pgste_set_unlock(ptep, pgste);
1021 }
1022 return pte;
1023}
923 1024
924#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1025#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
925#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ 1026static inline int ptep_set_access_flags(struct vm_area_struct *vma,
926({ \ 1027 unsigned long address, pte_t *ptep,
927 int __changed = !pte_same(*(__ptep), __entry); \ 1028 pte_t entry, int dirty)
928 if (__changed) { \ 1029{
929 ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ 1030 pgste_t pgste;
930 set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ 1031
931 } \ 1032 if (pte_same(*ptep, entry))
932 __changed; \ 1033 return 0;
933}) 1034 if (mm_has_pgste(vma->vm_mm))
1035 pgste = pgste_get_lock(ptep);
1036
1037 __ptep_ipte(address, ptep);
1038 *ptep = entry;
1039
1040 if (mm_has_pgste(vma->vm_mm))
1041 pgste_set_unlock(ptep, pgste);
1042 return 1;
1043}
934 1044
935/* 1045/*
936 * Conversion functions: convert a page and protection to a page entry, 1046 * Conversion functions: convert a page and protection to a page entry,
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index bb409332a484..dfefc2171691 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -175,7 +175,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
175 pmd = pmd_offset(pud, address); 175 pmd = pmd_offset(pud, address);
176 pte = pte_offset_kernel(pmd, address); 176 pte = pte_offset_kernel(pmd, address);
177 if (!enable) { 177 if (!enable) {
178 ptep_invalidate(&init_mm, address, pte); 178 __ptep_ipte(address, pte);
179 pte_val(*pte) = _PAGE_TYPE_EMPTY;
179 continue; 180 continue;
180 } 181 }
181 *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); 182 *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f05edcc3beff..d013ed39743b 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -28,7 +28,7 @@ static void change_page_attr(unsigned long addr, int numpages,
28 28
29 pte = *ptep; 29 pte = *ptep;
30 pte = set(pte); 30 pte = set(pte);
31 ptep_invalidate(&init_mm, addr, ptep); 31 __ptep_ipte(addr, ptep);
32 *ptep = pte; 32 *ptep = pte;
33 addr += PAGE_SIZE; 33 addr += PAGE_SIZE;
34 } 34 }
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 34c43f23b28c..8c1970d1dd91 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -95,7 +95,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
95 pu_dir = vmem_pud_alloc(); 95 pu_dir = vmem_pud_alloc();
96 if (!pu_dir) 96 if (!pu_dir)
97 goto out; 97 goto out;
98 pgd_populate_kernel(&init_mm, pg_dir, pu_dir); 98 pgd_populate(&init_mm, pg_dir, pu_dir);
99 } 99 }
100 100
101 pu_dir = pud_offset(pg_dir, address); 101 pu_dir = pud_offset(pg_dir, address);
@@ -103,7 +103,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
103 pm_dir = vmem_pmd_alloc(); 103 pm_dir = vmem_pmd_alloc();
104 if (!pm_dir) 104 if (!pm_dir)
105 goto out; 105 goto out;
106 pud_populate_kernel(&init_mm, pu_dir, pm_dir); 106 pud_populate(&init_mm, pu_dir, pm_dir);
107 } 107 }
108 108
109 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); 109 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
@@ -123,7 +123,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
123 pt_dir = vmem_pte_alloc(); 123 pt_dir = vmem_pte_alloc();
124 if (!pt_dir) 124 if (!pt_dir)
125 goto out; 125 goto out;
126 pmd_populate_kernel(&init_mm, pm_dir, pt_dir); 126 pmd_populate(&init_mm, pm_dir, pt_dir);
127 } 127 }
128 128
129 pt_dir = pte_offset_kernel(pm_dir, address); 129 pt_dir = pte_offset_kernel(pm_dir, address);
@@ -159,7 +159,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
159 continue; 159 continue;
160 160
161 if (pmd_huge(*pm_dir)) { 161 if (pmd_huge(*pm_dir)) {
162 pmd_clear_kernel(pm_dir); 162 pmd_clear(pm_dir);
163 address += HPAGE_SIZE - PAGE_SIZE; 163 address += HPAGE_SIZE - PAGE_SIZE;
164 continue; 164 continue;
165 } 165 }
@@ -192,7 +192,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
192 pu_dir = vmem_pud_alloc(); 192 pu_dir = vmem_pud_alloc();
193 if (!pu_dir) 193 if (!pu_dir)
194 goto out; 194 goto out;
195 pgd_populate_kernel(&init_mm, pg_dir, pu_dir); 195 pgd_populate(&init_mm, pg_dir, pu_dir);
196 } 196 }
197 197
198 pu_dir = pud_offset(pg_dir, address); 198 pu_dir = pud_offset(pg_dir, address);
@@ -200,7 +200,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
200 pm_dir = vmem_pmd_alloc(); 200 pm_dir = vmem_pmd_alloc();
201 if (!pm_dir) 201 if (!pm_dir)
202 goto out; 202 goto out;
203 pud_populate_kernel(&init_mm, pu_dir, pm_dir); 203 pud_populate(&init_mm, pu_dir, pm_dir);
204 } 204 }
205 205
206 pm_dir = pmd_offset(pu_dir, address); 206 pm_dir = pmd_offset(pu_dir, address);
@@ -208,7 +208,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
208 pt_dir = vmem_pte_alloc(); 208 pt_dir = vmem_pte_alloc();
209 if (!pt_dir) 209 if (!pt_dir)
210 goto out; 210 goto out;
211 pmd_populate_kernel(&init_mm, pm_dir, pt_dir); 211 pmd_populate(&init_mm, pm_dir, pt_dir);
212 } 212 }
213 213
214 pt_dir = pte_offset_kernel(pm_dir, address); 214 pt_dir = pte_offset_kernel(pm_dir, address);