aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <mawilcox@microsoft.com>2018-06-07 20:08:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 20:34:37 -0400
commit620b4e903179d58342503fa09d9c680d93bf7db8 (patch)
treef40077235c38641c7caef1cf03289cb65328514a
parente67d4ca79aaf9d13a00d229b1b1c96b86828e8ba (diff)
s390: use _refcount for pgtables
Patch series "Rearrange struct page", v6. As presented at LSFMM, this patch-set rearranges struct page to give more contiguous usable space to users who have allocated a struct page for their own purposes. For a graphical view of before-and-after, see the first two tabs of https://docs.google.com/spreadsheets/d/1tvCszs_7FXrjei9_mtFiKV6nW1FLnYyvPvW-qNZhdog/edit?usp=sharing Highlights: - deferred_list now really exists in struct page instead of just a comment. - hmm_data also exists in struct page instead of being a nasty hack. - x86's PGD pages have a real pointer to the mm_struct. - VMalloc pages now have all sorts of extra information stored in them to help with debugging and tuning. - rcu_head is no longer tied to slab in case anyone else wants to free pages by RCU. - slub's counters no longer share space with _refcount. - slub's freelist+counters are now naturally dword aligned. - slub loses a parameter to a lot of functions and a sysfs file. This patch (of 17): s390 borrows the storage used for _mapcount in struct page in order to account whether the bottom or top half is being used for 2kB page tables. I want to use that for something else, so use the top byte of _refcount instead of the bottom byte of _mapcount. _refcount may temporarily be incremented by other CPUs that see a stale pointer to this page in the page cache, but each CPU can only increment it by one, and there are no systems with 2^24 CPUs today, so they will not change the upper byte of _refcount. We do have to be a little careful not to lose any of their writes (as they will subsequently decrement the counter). Link: http://lkml.kernel.org/r/20180518194519.3820-2-willy@infradead.org Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com> Cc: Christoph Lameter <cl@linux.com> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/s390/mm/pgalloc.c21
1 files changed, 12 insertions, 9 deletions
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 562f72955956..84bd6329a88d 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -190,14 +190,15 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
190 if (!list_empty(&mm->context.pgtable_list)) { 190 if (!list_empty(&mm->context.pgtable_list)) {
191 page = list_first_entry(&mm->context.pgtable_list, 191 page = list_first_entry(&mm->context.pgtable_list,
192 struct page, lru); 192 struct page, lru);
193 mask = atomic_read(&page->_mapcount); 193 mask = atomic_read(&page->_refcount) >> 24;
194 mask = (mask | (mask >> 4)) & 3; 194 mask = (mask | (mask >> 4)) & 3;
195 if (mask != 3) { 195 if (mask != 3) {
196 table = (unsigned long *) page_to_phys(page); 196 table = (unsigned long *) page_to_phys(page);
197 bit = mask & 1; /* =1 -> second 2K */ 197 bit = mask & 1; /* =1 -> second 2K */
198 if (bit) 198 if (bit)
199 table += PTRS_PER_PTE; 199 table += PTRS_PER_PTE;
200 atomic_xor_bits(&page->_mapcount, 1U << bit); 200 atomic_xor_bits(&page->_refcount,
201 1U << (bit + 24));
201 list_del(&page->lru); 202 list_del(&page->lru);
202 } 203 }
203 } 204 }
@@ -218,12 +219,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
218 table = (unsigned long *) page_to_phys(page); 219 table = (unsigned long *) page_to_phys(page);
219 if (mm_alloc_pgste(mm)) { 220 if (mm_alloc_pgste(mm)) {
220 /* Return 4K page table with PGSTEs */ 221 /* Return 4K page table with PGSTEs */
221 atomic_set(&page->_mapcount, 3); 222 atomic_xor_bits(&page->_refcount, 3 << 24);
222 memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); 223 memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
223 memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); 224 memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
224 } else { 225 } else {
225 /* Return the first 2K fragment of the page */ 226 /* Return the first 2K fragment of the page */
226 atomic_set(&page->_mapcount, 1); 227 atomic_xor_bits(&page->_refcount, 1 << 24);
227 memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); 228 memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
228 spin_lock_bh(&mm->context.lock); 229 spin_lock_bh(&mm->context.lock);
229 list_add(&page->lru, &mm->context.pgtable_list); 230 list_add(&page->lru, &mm->context.pgtable_list);
@@ -242,7 +243,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
242 /* Free 2K page table fragment of a 4K page */ 243 /* Free 2K page table fragment of a 4K page */
243 bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); 244 bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
244 spin_lock_bh(&mm->context.lock); 245 spin_lock_bh(&mm->context.lock);
245 mask = atomic_xor_bits(&page->_mapcount, 1U << bit); 246 mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
247 mask >>= 24;
246 if (mask & 3) 248 if (mask & 3)
247 list_add(&page->lru, &mm->context.pgtable_list); 249 list_add(&page->lru, &mm->context.pgtable_list);
248 else 250 else
@@ -253,7 +255,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
253 } 255 }
254 256
255 pgtable_page_dtor(page); 257 pgtable_page_dtor(page);
256 atomic_set(&page->_mapcount, -1);
257 __free_page(page); 258 __free_page(page);
258} 259}
259 260
@@ -274,7 +275,8 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
274 } 275 }
275 bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); 276 bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
276 spin_lock_bh(&mm->context.lock); 277 spin_lock_bh(&mm->context.lock);
277 mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); 278 mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
279 mask >>= 24;
278 if (mask & 3) 280 if (mask & 3)
279 list_add_tail(&page->lru, &mm->context.pgtable_list); 281 list_add_tail(&page->lru, &mm->context.pgtable_list);
280 else 282 else
@@ -296,12 +298,13 @@ static void __tlb_remove_table(void *_table)
296 break; 298 break;
297 case 1: /* lower 2K of a 4K page table */ 299 case 1: /* lower 2K of a 4K page table */
298 case 2: /* higher 2K of a 4K page table */ 300 case 2: /* higher 2K of a 4K page table */
299 if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) 301 mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
302 mask >>= 24;
303 if (mask != 0)
300 break; 304 break;
301 /* fallthrough */ 305 /* fallthrough */
302 case 3: /* 4K page table with pgstes */ 306 case 3: /* 4K page table with pgstes */
303 pgtable_page_dtor(page); 307 pgtable_page_dtor(page);
304 atomic_set(&page->_mapcount, -1);
305 __free_page(page); 308 __free_page(page);
306 break; 309 break;
307 } 310 }