diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2015-04-22 07:55:59 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2015-04-23 10:56:20 -0400 |
commit | a1c843b82541fdd4c4644607c942dabc7c7e6f6c (patch) | |
tree | e610fdda8945bfabde597f38b755af7ba2f6f02d /arch | |
parent | b7d14f3a92223c3f5e52e9f20c74cb96dc130e87 (diff) |
s390/mm: change swap pte encoding and pgtable cleanup
After the file ptes have been removed the bit combination used to
encode non-linear mappings can be reused for the swap ptes. This
frees up a precious pte software bit. Reflect the change in the
swap encoding in the comments and do some cleanup while we are
at it.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 158 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 62 |
2 files changed, 97 insertions, 123 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1fba63997d50..fc642399b489 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -12,12 +12,9 @@ | |||
12 | #define _ASM_S390_PGTABLE_H | 12 | #define _ASM_S390_PGTABLE_H |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * The Linux memory management assumes a three-level page table setup. For | 15 | * The Linux memory management assumes a three-level page table setup. |
16 | * s390 31 bit we "fold" the mid level into the top-level page table, so | 16 | * For s390 64 bit we use up to four of the five levels the hardware |
17 | * that we physically have the same two-level page table as the s390 mmu | 17 | * provides (region first tables are not used). |
18 | * expects in 31 bit mode. For s390 64 bit we use three of the five levels | ||
19 | * the hardware provides (region first and region second tables are not | ||
20 | * used). | ||
21 | * | 18 | * |
22 | * The "pgd_xxx()" functions are trivial for a folded two-level | 19 | * The "pgd_xxx()" functions are trivial for a folded two-level |
23 | * setup: the pgd is never bad, and a pmd always exists (as it's folded | 20 | * setup: the pgd is never bad, and a pmd always exists (as it's folded |
@@ -101,8 +98,8 @@ extern unsigned long zero_page_mask; | |||
101 | 98 | ||
102 | #ifndef __ASSEMBLY__ | 99 | #ifndef __ASSEMBLY__ |
103 | /* | 100 | /* |
104 | * The vmalloc and module area will always be on the topmost area of the kernel | 101 | * The vmalloc and module area will always be on the topmost area of the |
105 | * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules. | 102 | * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules. |
106 | * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where | 103 | * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where |
107 | * modules will reside. That makes sure that inter module branches always | 104 | * modules will reside. That makes sure that inter module branches always |
108 | * happen without trampolines and in addition the placement within a 2GB frame | 105 | * happen without trampolines and in addition the placement within a 2GB frame |
@@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr) | |||
131 | } | 128 | } |
132 | 129 | ||
133 | /* | 130 | /* |
134 | * A 31 bit pagetable entry of S390 has following format: | ||
135 | * | PFRA | | OS | | ||
136 | * 0 0IP0 | ||
137 | * 00000000001111111111222222222233 | ||
138 | * 01234567890123456789012345678901 | ||
139 | * | ||
140 | * I Page-Invalid Bit: Page is not available for address-translation | ||
141 | * P Page-Protection Bit: Store access not possible for page | ||
142 | * | ||
143 | * A 31 bit segmenttable entry of S390 has following format: | ||
144 | * | P-table origin | |PTL | ||
145 | * 0 IC | ||
146 | * 00000000001111111111222222222233 | ||
147 | * 01234567890123456789012345678901 | ||
148 | * | ||
149 | * I Segment-Invalid Bit: Segment is not available for address-translation | ||
150 | * C Common-Segment Bit: Segment is not private (PoP 3-30) | ||
151 | * PTL Page-Table-Length: Page-table length (PTL+1*16 entries -> up to 256) | ||
152 | * | ||
153 | * The 31 bit segmenttable origin of S390 has following format: | ||
154 | * | ||
155 | * |S-table origin | | STL | | ||
156 | * X **GPS | ||
157 | * 00000000001111111111222222222233 | ||
158 | * 01234567890123456789012345678901 | ||
159 | * | ||
160 | * X Space-Switch event: | ||
161 | * G Segment-Invalid Bit: * | ||
162 | * P Private-Space Bit: Segment is not private (PoP 3-30) | ||
163 | * S Storage-Alteration: | ||
164 | * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) | ||
165 | * | ||
166 | * A 64 bit pagetable entry of S390 has following format: | 131 | * A 64 bit pagetable entry of S390 has following format: |
167 | * | PFRA |0IPC| OS | | 132 | * | PFRA |0IPC| OS | |
168 | * 0000000000111111111122222222223333333333444444444455555555556666 | 133 | * 0000000000111111111122222222223333333333444444444455555555556666 |
@@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr) | |||
220 | 185 | ||
221 | /* Software bits in the page table entry */ | 186 | /* Software bits in the page table entry */ |
222 | #define _PAGE_PRESENT 0x001 /* SW pte present bit */ | 187 | #define _PAGE_PRESENT 0x001 /* SW pte present bit */ |
223 | #define _PAGE_TYPE 0x002 /* SW pte type bit */ | ||
224 | #define _PAGE_YOUNG 0x004 /* SW pte young bit */ | 188 | #define _PAGE_YOUNG 0x004 /* SW pte young bit */ |
225 | #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ | 189 | #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ |
226 | #define _PAGE_READ 0x010 /* SW pte read bit */ | 190 | #define _PAGE_READ 0x010 /* SW pte read bit */ |
@@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr) | |||
240 | * table lock held. | 204 | * table lock held. |
241 | * | 205 | * |
242 | * The following table gives the different possible bit combinations for | 206 | * The following table gives the different possible bit combinations for |
243 | * the pte hardware and software bits in the last 12 bits of a pte: | 207 | * the pte hardware and software bits in the last 12 bits of a pte |
208 | * (. unassigned bit, x don't care, t swap type): | ||
244 | * | 209 | * |
245 | * 842100000000 | 210 | * 842100000000 |
246 | * 000084210000 | 211 | * 000084210000 |
247 | * 000000008421 | 212 | * 000000008421 |
248 | * .IR...wrdytp | 213 | * .IR.uswrdy.p |
249 | * empty .10...000000 | 214 | * empty .10.00000000 |
250 | * swap .10...xxxx10 | 215 | * swap .11..ttttt.0 |
251 | * file .11...xxxxx0 | 216 | * prot-none, clean, old .11.xx0000.1 |
252 | * prot-none, clean, old .11...000001 | 217 | * prot-none, clean, young .11.xx0001.1 |
253 | * prot-none, clean, young .11...000101 | 218 | * prot-none, dirty, old .10.xx0010.1 |
254 | * prot-none, dirty, old .10...001001 | 219 | * prot-none, dirty, young .10.xx0011.1 |
255 | * prot-none, dirty, young .10...001101 | 220 | * read-only, clean, old .11.xx0100.1 |
256 | * read-only, clean, old .11...010001 | 221 | * read-only, clean, young .01.xx0101.1 |
257 | * read-only, clean, young .01...010101 | 222 | * read-only, dirty, old .11.xx0110.1 |
258 | * read-only, dirty, old .11...011001 | 223 | * read-only, dirty, young .01.xx0111.1 |
259 | * read-only, dirty, young .01...011101 | 224 | * read-write, clean, old .11.xx1100.1 |
260 | * read-write, clean, old .11...110001 | 225 | * read-write, clean, young .01.xx1101.1 |
261 | * read-write, clean, young .01...110101 | 226 | * read-write, dirty, old .10.xx1110.1 |
262 | * read-write, dirty, old .10...111001 | 227 | * read-write, dirty, young .00.xx1111.1 |
263 | * read-write, dirty, young .00...111101 | 228 | * HW-bits: R read-only, I invalid |
229 | * SW-bits: p present, y young, d dirty, r read, w write, s special, | ||
230 | * u unused, l large | ||
264 | * | 231 | * |
265 | * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 | 232 | * pte_none is true for the bit pattern .10.00000000, pte == 0x400 |
266 | * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 | 233 | * pte_swap is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200 |
267 | * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 | 234 | * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001 |
268 | */ | 235 | */ |
269 | 236 | ||
270 | /* Bits in the segment/region table address-space-control-element */ | 237 | /* Bits in the segment/region table address-space-control-element */ |
@@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr) | |||
335 | * read-write, dirty, young 11..0...0...11 | 302 | * read-write, dirty, young 11..0...0...11 |
336 | * The segment table origin is used to distinguish empty (origin==0) from | 303 | * The segment table origin is used to distinguish empty (origin==0) from |
337 | * read-write, old segment table entries (origin!=0) | 304 | * read-write, old segment table entries (origin!=0) |
305 | * HW-bits: R read-only, I invalid | ||
306 | * SW-bits: y young, d dirty, r read, w write | ||
338 | */ | 307 | */ |
339 | 308 | ||
340 | #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ | 309 | #define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */ |
@@ -591,10 +560,9 @@ static inline int pte_none(pte_t pte) | |||
591 | 560 | ||
592 | static inline int pte_swap(pte_t pte) | 561 | static inline int pte_swap(pte_t pte) |
593 | { | 562 | { |
594 | /* Bit pattern: (pte & 0x603) == 0x402 */ | 563 | /* Bit pattern: (pte & 0x201) == 0x200 */ |
595 | return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | | 564 | return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT)) |
596 | _PAGE_TYPE | _PAGE_PRESENT)) | 565 | == _PAGE_PROTECT; |
597 | == (_PAGE_INVALID | _PAGE_TYPE); | ||
598 | } | 566 | } |
599 | 567 | ||
600 | static inline int pte_special(pte_t pte) | 568 | static inline int pte_special(pte_t pte) |
@@ -1595,51 +1563,51 @@ static inline int has_transparent_hugepage(void) | |||
1595 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 1563 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
1596 | 1564 | ||
1597 | /* | 1565 | /* |
1598 | * 31 bit swap entry format: | ||
1599 | * A page-table entry has some bits we have to treat in a special way. | ||
1600 | * Bits 0, 20 and bit 23 have to be zero, otherwise an specification | ||
1601 | * exception will occur instead of a page translation exception. The | ||
1602 | * specifiation exception has the bad habit not to store necessary | ||
1603 | * information in the lowcore. | ||
1604 | * Bits 21, 22, 30 and 31 are used to indicate the page type. | ||
1605 | * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 | ||
1606 | * This leaves the bits 1-19 and bits 24-29 to store type and offset. | ||
1607 | * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 | ||
1608 | * plus 24 for the offset. | ||
1609 | * 0| offset |0110|o|type |00| | ||
1610 | * 0 0000000001111111111 2222 2 22222 33 | ||
1611 | * 0 1234567890123456789 0123 4 56789 01 | ||
1612 | * | ||
1613 | * 64 bit swap entry format: | 1566 | * 64 bit swap entry format: |
1614 | * A page-table entry has some bits we have to treat in a special way. | 1567 | * A page-table entry has some bits we have to treat in a special way. |
1615 | * Bits 52 and bit 55 have to be zero, otherwise an specification | 1568 | * Bits 52 and bit 55 have to be zero, otherwise an specification |
1616 | * exception will occur instead of a page translation exception. The | 1569 | * exception will occur instead of a page translation exception. The |
1617 | * specifiation exception has the bad habit not to store necessary | 1570 | * specifiation exception has the bad habit not to store necessary |
1618 | * information in the lowcore. | 1571 | * information in the lowcore. |
1619 | * Bits 53, 54, 62 and 63 are used to indicate the page type. | 1572 | * Bits 54 and 63 are used to indicate the page type. |
1620 | * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 | 1573 | * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200 |
1621 | * This leaves the bits 0-51 and bits 56-61 to store type and offset. | 1574 | * This leaves the bits 0-51 and bits 56-62 to store type and offset. |
1622 | * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 | 1575 | * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51 |
1623 | * plus 56 for the offset. | 1576 | * for the offset. |
1624 | * | offset |0110|o|type |00| | 1577 | * | offset |01100|type |00| |
1625 | * 0000000000111111111122222222223333333333444444444455 5555 5 55566 66 | 1578 | * |0000000000111111111122222222223333333333444444444455|55555|55566|66| |
1626 | * 0123456789012345678901234567890123456789012345678901 2345 6 78901 23 | 1579 | * |0123456789012345678901234567890123456789012345678901|23456|78901|23| |
1627 | */ | 1580 | */ |
1628 | 1581 | ||
1629 | #define __SWP_OFFSET_MASK (~0UL >> 11) | 1582 | #define __SWP_OFFSET_MASK ((1UL << 52) - 1) |
1583 | #define __SWP_OFFSET_SHIFT 12 | ||
1584 | #define __SWP_TYPE_MASK ((1UL << 5) - 1) | ||
1585 | #define __SWP_TYPE_SHIFT 2 | ||
1630 | 1586 | ||
1631 | static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) | 1587 | static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) |
1632 | { | 1588 | { |
1633 | pte_t pte; | 1589 | pte_t pte; |
1634 | offset &= __SWP_OFFSET_MASK; | 1590 | |
1635 | pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | | 1591 | pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT; |
1636 | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); | 1592 | pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT; |
1593 | pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT; | ||
1637 | return pte; | 1594 | return pte; |
1638 | } | 1595 | } |
1639 | 1596 | ||
1640 | #define __swp_type(entry) (((entry).val >> 2) & 0x1f) | 1597 | static inline unsigned long __swp_type(swp_entry_t entry) |
1641 | #define __swp_offset(entry) (((entry).val >> 11) | (((entry).val >> 7) & 1)) | 1598 | { |
1642 | #define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) | 1599 | return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK; |
1600 | } | ||
1601 | |||
1602 | static inline unsigned long __swp_offset(swp_entry_t entry) | ||
1603 | { | ||
1604 | return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK; | ||
1605 | } | ||
1606 | |||
1607 | static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) | ||
1608 | { | ||
1609 | return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) }; | ||
1610 | } | ||
1643 | 1611 | ||
1644 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) | 1612 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) |
1645 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | 1613 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c67e8bf012b6..e617e74b7be2 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte) | |||
14 | 14 | ||
15 | /* | 15 | /* |
16 | * Convert encoding pte bits pmd bits | 16 | * Convert encoding pte bits pmd bits |
17 | * .IR...wrdytp dy..R...I...wr | 17 | * lIR.uswrdy.p dy..R...I...wr |
18 | * empty .10...000000 -> 00..0...1...00 | 18 | * empty 010.000000.0 -> 00..0...1...00 |
19 | * prot-none, clean, old .11...000001 -> 00..1...1...00 | 19 | * prot-none, clean, old 111.000000.1 -> 00..1...1...00 |
20 | * prot-none, clean, young .11...000101 -> 01..1...1...00 | 20 | * prot-none, clean, young 111.000001.1 -> 01..1...1...00 |
21 | * prot-none, dirty, old .10...001001 -> 10..1...1...00 | 21 | * prot-none, dirty, old 111.000010.1 -> 10..1...1...00 |
22 | * prot-none, dirty, young .10...001101 -> 11..1...1...00 | 22 | * prot-none, dirty, young 111.000011.1 -> 11..1...1...00 |
23 | * read-only, clean, old .11...010001 -> 00..1...1...01 | 23 | * read-only, clean, old 111.000100.1 -> 00..1...1...01 |
24 | * read-only, clean, young .01...010101 -> 01..1...0...01 | 24 | * read-only, clean, young 101.000101.1 -> 01..1...0...01 |
25 | * read-only, dirty, old .11...011001 -> 10..1...1...01 | 25 | * read-only, dirty, old 111.000110.1 -> 10..1...1...01 |
26 | * read-only, dirty, young .01...011101 -> 11..1...0...01 | 26 | * read-only, dirty, young 101.000111.1 -> 11..1...0...01 |
27 | * read-write, clean, old .11...110001 -> 00..0...1...11 | 27 | * read-write, clean, old 111.001100.1 -> 00..1...1...11 |
28 | * read-write, clean, young .01...110101 -> 01..0...0...11 | 28 | * read-write, clean, young 101.001101.1 -> 01..1...0...11 |
29 | * read-write, dirty, old .10...111001 -> 10..0...1...11 | 29 | * read-write, dirty, old 110.001110.1 -> 10..0...1...11 |
30 | * read-write, dirty, young .00...111101 -> 11..0...0...11 | 30 | * read-write, dirty, young 100.001111.1 -> 11..0...0...11 |
31 | * HW-bits: R read-only, I invalid | ||
32 | * SW-bits: p present, y young, d dirty, r read, w write, s special, | ||
33 | * u unused, l large | ||
31 | */ | 34 | */ |
32 | if (pte_present(pte)) { | 35 | if (pte_present(pte)) { |
33 | pmd_val(pmd) = pte_val(pte) & PAGE_MASK; | 36 | pmd_val(pmd) = pte_val(pte) & PAGE_MASK; |
@@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) | |||
48 | 51 | ||
49 | /* | 52 | /* |
50 | * Convert encoding pmd bits pte bits | 53 | * Convert encoding pmd bits pte bits |
51 | * dy..R...I...wr .IR...wrdytp | 54 | * dy..R...I...wr lIR.uswrdy.p |
52 | * empty 00..0...1...00 -> .10...001100 | 55 | * empty 00..0...1...00 -> 010.000000.0 |
53 | * prot-none, clean, old 00..0...1...00 -> .10...000001 | 56 | * prot-none, clean, old 00..1...1...00 -> 111.000000.1 |
54 | * prot-none, clean, young 01..0...1...00 -> .10...000101 | 57 | * prot-none, clean, young 01..1...1...00 -> 111.000001.1 |
55 | * prot-none, dirty, old 10..0...1...00 -> .10...001001 | 58 | * prot-none, dirty, old 10..1...1...00 -> 111.000010.1 |
56 | * prot-none, dirty, young 11..0...1...00 -> .10...001101 | 59 | * prot-none, dirty, young 11..1...1...00 -> 111.000011.1 |
57 | * read-only, clean, old 00..1...1...01 -> .11...010001 | 60 | * read-only, clean, old 00..1...1...01 -> 111.000100.1 |
58 | * read-only, clean, young 01..1...1...01 -> .11...010101 | 61 | * read-only, clean, young 01..1...0...01 -> 101.000101.1 |
59 | * read-only, dirty, old 10..1...1...01 -> .11...011001 | 62 | * read-only, dirty, old 10..1...1...01 -> 111.000110.1 |
60 | * read-only, dirty, young 11..1...1...01 -> .11...011101 | 63 | * read-only, dirty, young 11..1...0...01 -> 101.000111.1 |
61 | * read-write, clean, old 00..0...1...11 -> .10...110001 | 64 | * read-write, clean, old 00..1...1...11 -> 111.001100.1 |
62 | * read-write, clean, young 01..0...1...11 -> .10...110101 | 65 | * read-write, clean, young 01..1...0...11 -> 101.001101.1 |
63 | * read-write, dirty, old 10..0...1...11 -> .10...111001 | 66 | * read-write, dirty, old 10..0...1...11 -> 110.001110.1 |
64 | * read-write, dirty, young 11..0...1...11 -> .10...111101 | 67 | * read-write, dirty, young 11..0...0...11 -> 100.001111.1 |
68 | * HW-bits: R read-only, I invalid | ||
69 | * SW-bits: p present, y young, d dirty, r read, w write, s special, | ||
70 | * u unused, l large | ||
65 | */ | 71 | */ |
66 | if (pmd_present(pmd)) { | 72 | if (pmd_present(pmd)) { |
67 | pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; | 73 | pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE; |