aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/include/asm/pgtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r--arch/s390/include/asm/pgtable.h167
1 files changed, 72 insertions, 95 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 989cfae9e202..fc642399b489 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -12,12 +12,9 @@
12#define _ASM_S390_PGTABLE_H 12#define _ASM_S390_PGTABLE_H
13 13
14/* 14/*
15 * The Linux memory management assumes a three-level page table setup. For 15 * The Linux memory management assumes a three-level page table setup.
16 * s390 31 bit we "fold" the mid level into the top-level page table, so 16 * For s390 64 bit we use up to four of the five levels the hardware
17 * that we physically have the same two-level page table as the s390 mmu 17 * provides (region first tables are not used).
18 * expects in 31 bit mode. For s390 64 bit we use three of the five levels
19 * the hardware provides (region first and region second tables are not
20 * used).
21 * 18 *
22 * The "pgd_xxx()" functions are trivial for a folded two-level 19 * The "pgd_xxx()" functions are trivial for a folded two-level
23 * setup: the pgd is never bad, and a pmd always exists (as it's folded 20 * setup: the pgd is never bad, and a pmd always exists (as it's folded
@@ -101,8 +98,8 @@ extern unsigned long zero_page_mask;
101 98
102#ifndef __ASSEMBLY__ 99#ifndef __ASSEMBLY__
103/* 100/*
104 * The vmalloc and module area will always be on the topmost area of the kernel 101 * The vmalloc and module area will always be on the topmost area of the
105 * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. 102 * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
106 * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where 103 * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
107 * modules will reside. That makes sure that inter module branches always 104 * modules will reside. That makes sure that inter module branches always
108 * happen without trampolines and in addition the placement within a 2GB frame 105 * happen without trampolines and in addition the placement within a 2GB frame
@@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr)
131} 128}
132 129
133/* 130/*
134 * A 31 bit pagetable entry of S390 has following format:
135 * | PFRA | | OS |
136 * 0 0IP0
137 * 00000000001111111111222222222233
138 * 01234567890123456789012345678901
139 *
140 * I Page-Invalid Bit: Page is not available for address-translation
141 * P Page-Protection Bit: Store access not possible for page
142 *
143 * A 31 bit segmenttable entry of S390 has following format:
144 * | P-table origin | |PTL
145 * 0 IC
146 * 00000000001111111111222222222233
147 * 01234567890123456789012345678901
148 *
149 * I Segment-Invalid Bit: Segment is not available for address-translation
150 * C Common-Segment Bit: Segment is not private (PoP 3-30)
151 * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256)
152 *
153 * The 31 bit segmenttable origin of S390 has following format:
154 *
155 * |S-table origin | | STL |
156 * X **GPS
157 * 00000000001111111111222222222233
158 * 01234567890123456789012345678901
159 *
160 * X Space-Switch event:
161 * G Segment-Invalid Bit: *
162 * P Private-Space Bit: Segment is not private (PoP 3-30)
163 * S Storage-Alteration:
164 * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048)
165 *
166 * A 64 bit pagetable entry of S390 has following format: 131 * A 64 bit pagetable entry of S390 has following format:
167 * | PFRA |0IPC| OS | 132 * | PFRA |0IPC| OS |
168 * 0000000000111111111122222222223333333333444444444455555555556666 133 * 0000000000111111111122222222223333333333444444444455555555556666
@@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr)
220 185
221/* Software bits in the page table entry */ 186/* Software bits in the page table entry */
222#define _PAGE_PRESENT 0x001 /* SW pte present bit */ 187#define _PAGE_PRESENT 0x001 /* SW pte present bit */
223#define _PAGE_TYPE 0x002 /* SW pte type bit */
224#define _PAGE_YOUNG 0x004 /* SW pte young bit */ 188#define _PAGE_YOUNG 0x004 /* SW pte young bit */
225#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ 189#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
226#define _PAGE_READ 0x010 /* SW pte read bit */ 190#define _PAGE_READ 0x010 /* SW pte read bit */
@@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr)
240 * table lock held. 204 * table lock held.
241 * 205 *
242 * The following table gives the different possible bit combinations for 206 * The following table gives the different possible bit combinations for
243 * the pte hardware and software bits in the last 12 bits of a pte: 207 * the pte hardware and software bits in the last 12 bits of a pte
208 * (. unassigned bit, x don't care, t swap type):
244 * 209 *
245 * 842100000000 210 * 842100000000
246 * 000084210000 211 * 000084210000
247 * 000000008421 212 * 000000008421
248 * .IR...wrdytp 213 * .IR.uswrdy.p
249 * empty .10...000000 214 * empty .10.00000000
250 * swap .10...xxxx10 215 * swap .11..ttttt.0
251 * file .11...xxxxx0 216 * prot-none, clean, old .11.xx0000.1
252 * prot-none, clean, old .11...000001 217 * prot-none, clean, young .11.xx0001.1
253 * prot-none, clean, young .11...000101 218 * prot-none, dirty, old .10.xx0010.1
254 * prot-none, dirty, old .10...001001 219 * prot-none, dirty, young .10.xx0011.1
255 * prot-none, dirty, young .10...001101 220 * read-only, clean, old .11.xx0100.1
256 * read-only, clean, old .11...010001 221 * read-only, clean, young .01.xx0101.1
257 * read-only, clean, young .01...010101 222 * read-only, dirty, old .11.xx0110.1
258 * read-only, dirty, old .11...011001 223 * read-only, dirty, young .01.xx0111.1
259 * read-only, dirty, young .01...011101 224 * read-write, clean, old .11.xx1100.1
260 * read-write, clean, old .11...110001 225 * read-write, clean, young .01.xx1101.1
261 * read-write, clean, young .01...110101 226 * read-write, dirty, old .10.xx1110.1
262 * read-write, dirty, old .10...111001 227 * read-write, dirty, young .00.xx1111.1
263 * read-write, dirty, young .00...111101 228 * HW-bits: R read-only, I invalid
229 * SW-bits: p present, y young, d dirty, r read, w write, s special,
230 * u unused, l large
264 * 231 *
265 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 232 * pte_none is true for the bit pattern .10.00000000, pte == 0x400
266 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 233 * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
267 * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 234 * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
268 */ 235 */
269 236
270/* Bits in the segment/region table address-space-control-element */ 237/* Bits in the segment/region table address-space-control-element */
@@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr)
335 * read-write, dirty, young 11..0...0...11 302 * read-write, dirty, young 11..0...0...11
336 * The segment table origin is used to distinguish empty (origin==0) from 303 * The segment table origin is used to distinguish empty (origin==0) from
337 * read-write, old segment table entries (origin!=0) 304 * read-write, old segment table entries (origin!=0)
305 * HW-bits: R read-only, I invalid
306 * SW-bits: y young, d dirty, r read, w write
338 */ 307 */
339 308
340#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ 309#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
@@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
423 return 0; 392 return 0;
424} 393}
425 394
395static inline int mm_alloc_pgste(struct mm_struct *mm)
396{
397#ifdef CONFIG_PGSTE
398 if (unlikely(mm->context.alloc_pgste))
399 return 1;
400#endif
401 return 0;
402}
403
426/* 404/*
427 * In the case that a guest uses storage keys 405 * In the case that a guest uses storage keys
428 * faults should no longer be backed by zero pages 406 * faults should no longer be backed by zero pages
@@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte)
582 560
583static inline int pte_swap(pte_t pte) 561static inline int pte_swap(pte_t pte)
584{ 562{
585 /* Bit pattern: (pte & 0x603) == 0x402 */ 563 /* Bit pattern: (pte & 0x201) == 0x200 */
586 return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | 564 return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
587 _PAGE_TYPE | _PAGE_PRESENT)) 565 == _PAGE_PROTECT;
588 == (_PAGE_INVALID | _PAGE_TYPE);
589} 566}
590 567
591static inline int pte_special(pte_t pte) 568static inline int pte_special(pte_t pte)
@@ -1586,51 +1563,51 @@ static inline int has_transparent_hugepage(void)
1586#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1563#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1587 1564
1588/* 1565/*
1589 * 31 bit swap entry format:
1590 * A page-table entry has some bits we have to treat in a special way.
1591 * Bits 0, 20 and bit 23 have to be zero, otherwise an specification
1592 * exception will occur instead of a page translation exception. The
1593 * specifiation exception has the bad habit not to store necessary
1594 * information in the lowcore.
1595 * Bits 21, 22, 30 and 31 are used to indicate the page type.
1596 * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
1597 * This leaves the bits 1-19 and bits 24-29 to store type and offset.
1598 * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
1599 * plus 24 for the offset.
1600 * 0| offset |0110|o|type |00|
1601 * 0 0000000001111111111 2222 2 22222 33
1602 * 0 1234567890123456789 0123 4 56789 01
1603 *
1604 * 64 bit swap entry format: 1566 * 64 bit swap entry format:
1605 * A page-table entry has some bits we have to treat in a special way. 1567 * A page-table entry has some bits we have to treat in a special way.
1606 * Bits 52 and bit 55 have to be zero, otherwise an specification 1568 * Bits 52 and bit 55 have to be zero, otherwise an specification
1607 * exception will occur instead of a page translation exception. The 1569 * exception will occur instead of a page translation exception. The
1608 * specifiation exception has the bad habit not to store necessary 1570 * specifiation exception has the bad habit not to store necessary
1609 * information in the lowcore. 1571 * information in the lowcore.
1610 * Bits 53, 54, 62 and 63 are used to indicate the page type. 1572 * Bits 54 and 63 are used to indicate the page type.
1611 * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 1573 * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
1612 * This leaves the bits 0-51 and bits 56-61 to store type and offset. 1574 * This leaves the bits 0-51 and bits 56-62 to store type and offset.
1613 * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 1575 * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
1614 * plus 56 for the offset. 1576 * for the offset.
1615 * | offset |0110|o|type |00| 1577 * | offset |01100|type |00|
1616 * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 1578 * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
1617 * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 1579 * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
1618 */ 1580 */
1619 1581
1620#define __SWP_OFFSET_MASK (~0UL >> 11) 1582#define __SWP_OFFSET_MASK ((1UL << 52) - 1)
1583#define __SWP_OFFSET_SHIFT 12
1584#define __SWP_TYPE_MASK ((1UL << 5) - 1)
1585#define __SWP_TYPE_SHIFT 2
1621 1586
1622static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) 1587static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1623{ 1588{
1624 pte_t pte; 1589 pte_t pte;
1625 offset &= __SWP_OFFSET_MASK; 1590
1626 pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | 1591 pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
1627 ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); 1592 pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
1593 pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
1628 return pte; 1594 return pte;
1629} 1595}
1630 1596
1631#define __swp_type(entry) (((entry).val >> 2) & 0x1f) 1597static inline unsigned long __swp_type(swp_entry_t entry)
1632#define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) 1598{
1633#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) 1599 return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
1600}
1601
1602static inline unsigned long __swp_offset(swp_entry_t entry)
1603{
1604 return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
1605}
1606
1607static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
1608{
1609 return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
1610}
1634 1611
1635#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 1612#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
1636#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 1613#define __swp_entry_to_pte(x) ((pte_t) { (x).val })