aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2014-05-06 14:01:36 -0400
committerAlexander Graf <agraf@suse.de>2014-05-30 08:26:24 -0400
commit1f365bb0de12da4a9ef8e56ffba2218d9a026011 (patch)
tree29fe09a66847f9e68d066e40797467aa5a8bd375
parent792fc49787cb7df13f1c38d3e25c863e1c3a6bb2 (diff)
KVM: PPC: BOOK3S: HV: Add mixed page-size support for guest
On recent IBM Power CPUs, while the hashed page table is looked up using the page size from the segmentation hardware (i.e. the SLB), it is possible to have the HPT entry indicate a larger page size. Thus for example it is possible to put a 16MB page in a 64kB segment, but since the hash lookup is done using a 64kB page size, it may be necessary to put multiple entries in the HPT for a single 16MB page. This capability is called mixed page-size segment (MPSS). With MPSS, there are two relevant page sizes: the base page size, which is the size used in searching the HPT, and the actual page size, which is the size indicated in the HPT entry. [ Note that the actual page size is always >= base page size ]. We use "ibm,segment-page-sizes" device tree node to advertise the MPSS support to PAPR guest. The penc encoding indicates whether we support a specific combination of base page size and actual page size in the same segment. We also use the penc value in the LP encoding of HPTE entry. This patch exposes MPSS support to KVM guest by advertising the feature via "ibm,segment-page-sizes". It also adds the necessary changes to decode the base page size and the actual page size correctly from the HPTE entry. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Alexander Graf <agraf@suse.de>
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h146
-rw-r--r--arch/powerpc/kvm/book3s_hv.c7
2 files changed, 130 insertions, 23 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 51388befeddb..fddb72b48ce9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -77,34 +77,122 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
77 return old == 0; 77 return old == 0;
78} 78}
79 79
80static inline int __hpte_actual_psize(unsigned int lp, int psize)
81{
82 int i, shift;
83 unsigned int mask;
84
85 /* start from 1 ignoring MMU_PAGE_4K */
86 for (i = 1; i < MMU_PAGE_COUNT; i++) {
87
88 /* invalid penc */
89 if (mmu_psize_defs[psize].penc[i] == -1)
90 continue;
91 /*
92 * encoding bits per actual page size
93 * PTE LP actual page size
94 * rrrr rrrz >=8KB
95 * rrrr rrzz >=16KB
96 * rrrr rzzz >=32KB
97 * rrrr zzzz >=64KB
98 * .......
99 */
100 shift = mmu_psize_defs[i].shift - LP_SHIFT;
101 if (shift > LP_BITS)
102 shift = LP_BITS;
103 mask = (1 << shift) - 1;
104 if ((lp & mask) == mmu_psize_defs[psize].penc[i])
105 return i;
106 }
107 return -1;
108}
109
80static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, 110static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
81 unsigned long pte_index) 111 unsigned long pte_index)
82{ 112{
83 unsigned long rb, va_low; 113 int b_psize, a_psize;
114 unsigned int penc;
115 unsigned long rb = 0, va_low, sllp;
116 unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
117
118 if (!(v & HPTE_V_LARGE)) {
119 /* both base and actual psize is 4k */
120 b_psize = MMU_PAGE_4K;
121 a_psize = MMU_PAGE_4K;
122 } else {
123 for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
124
125 /* valid entries have a shift value */
126 if (!mmu_psize_defs[b_psize].shift)
127 continue;
84 128
129 a_psize = __hpte_actual_psize(lp, b_psize);
130 if (a_psize != -1)
131 break;
132 }
133 }
134 /*
135 * Ignore the top 14 bits of va
136 * v have top two bits covering segment size, hence move
137 * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
138 * AVA field in v also have the lower 23 bits ignored.
139 * For base page size 4K we need 14 .. 65 bits (so need to
140 * collect extra 11 bits)
141 * For others we need 14..14+i
142 */
143 /* This covers 14..54 bits of va*/
85 rb = (v & ~0x7fUL) << 16; /* AVA field */ 144 rb = (v & ~0x7fUL) << 16; /* AVA field */
145 /*
146 * AVA in v had cleared lower 23 bits. We need to derive
147 * that from pteg index
148 */
86 va_low = pte_index >> 3; 149 va_low = pte_index >> 3;
87 if (v & HPTE_V_SECONDARY) 150 if (v & HPTE_V_SECONDARY)
88 va_low = ~va_low; 151 va_low = ~va_low;
89 /* xor vsid from AVA */ 152 /*
153 * get the vpn bits from va_low using reverse of hashing.
154 * In v we have va with 23 bits dropped and then left shifted
155 * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
156 * right shift it with (SID_SHIFT - (23 - 7))
157 */
90 if (!(v & HPTE_V_1TB_SEG)) 158 if (!(v & HPTE_V_1TB_SEG))
91 va_low ^= v >> 12; 159 va_low ^= v >> (SID_SHIFT - 16);
92 else 160 else
93 va_low ^= v >> 24; 161 va_low ^= v >> (SID_SHIFT_1T - 16);
94 va_low &= 0x7ff; 162 va_low &= 0x7ff;
95 if (v & HPTE_V_LARGE) { 163
96 rb |= 1; /* L field */ 164 switch (b_psize) {
97 if (cpu_has_feature(CPU_FTR_ARCH_206) && 165 case MMU_PAGE_4K:
98 (r & 0xff000)) { 166 sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
99 /* non-16MB large page, must be 64k */ 167 ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
100 /* (masks depend on page size) */ 168 rb |= sllp << 5; /* AP field */
101 rb |= 0x1000; /* page encoding in LP field */ 169 rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
102 rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */ 170 break;
103 rb |= ((va_low << 4) & 0xf0); /* AVAL field (P7 doesn't seem to care) */ 171 default:
104 } 172 {
105 } else { 173 int aval_shift;
106 /* 4kB page */ 174 /*
107 rb |= (va_low & 0x7ff) << 12; /* remaining 11b of VA */ 175 * remaining 7bits of AVA/LP fields
176 * Also contain the rr bits of LP
177 */
178 rb |= (va_low & 0x7f) << 16;
179 /*
180 * Now clear not needed LP bits based on actual psize
181 */
182 rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
183 /*
184 * AVAL field 58..77 - base_page_shift bits of va
185 * we have space for 58..64 bits, Missing bits should
186 * be zero filled. +1 is to take care of L bit shift
187 */
188 aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
189 rb |= ((va_low << aval_shift) & 0xfe);
190
191 rb |= 1; /* L field */
192 penc = mmu_psize_defs[b_psize].penc[a_psize];
193 rb |= penc << 12; /* LP field */
194 break;
195 }
108 } 196 }
109 rb |= (v >> 54) & 0x300; /* B field */ 197 rb |= (v >> 54) & 0x300; /* B field */
110 return rb; 198 return rb;
@@ -112,14 +200,26 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
112 200
113static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) 201static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
114{ 202{
203 int size, a_psize;
204 /* Look at the 8 bit LP value */
205 unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
206
115 /* only handle 4k, 64k and 16M pages for now */ 207 /* only handle 4k, 64k and 16M pages for now */
116 if (!(h & HPTE_V_LARGE)) 208 if (!(h & HPTE_V_LARGE))
117 return 1ul << 12; /* 4k page */ 209 return 1ul << 12;
118 if ((l & 0xf000) == 0x1000 && cpu_has_feature(CPU_FTR_ARCH_206)) 210 else {
119 return 1ul << 16; /* 64k page */ 211 for (size = 0; size < MMU_PAGE_COUNT; size++) {
120 if ((l & 0xff000) == 0) 212 /* valid entries have a shift value */
121 return 1ul << 24; /* 16M page */ 213 if (!mmu_psize_defs[size].shift)
122 return 0; /* error */ 214 continue;
215
216 a_psize = __hpte_actual_psize(lp, size);
217 if (a_psize != -1)
218 return 1ul << mmu_psize_defs[a_psize].shift;
219 }
220
221 }
222 return 0;
123} 223}
124 224
125static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) 225static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3a9456165224..aba05bbb3e74 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1930,6 +1930,13 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1930 * support pte_enc here 1930 * support pte_enc here
1931 */ 1931 */
1932 (*sps)->enc[0].pte_enc = def->penc[linux_psize]; 1932 (*sps)->enc[0].pte_enc = def->penc[linux_psize];
1933 /*
1934 * Add 16MB MPSS support if host supports it
1935 */
1936 if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
1937 (*sps)->enc[1].page_shift = 24;
1938 (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
1939 }
1933 (*sps)++; 1940 (*sps)++;
1934} 1941}
1935 1942