diff options
| author | Matthew Wilcox <mawilcox@microsoft.com> | 2018-06-07 20:08:15 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 20:34:37 -0400 |
| commit | 620b4e903179d58342503fa09d9c680d93bf7db8 (patch) | |
| tree | f40077235c38641c7caef1cf03289cb65328514a | |
| parent | e67d4ca79aaf9d13a00d229b1b1c96b86828e8ba (diff) | |
s390: use _refcount for pgtables
Patch series "Rearrange struct page", v6.
As presented at LSFMM, this patch-set rearranges struct page to give
more contiguous usable space to users who have allocated a struct page
for their own purposes. For a graphical view of before-and-after, see
the first two tabs of
https://docs.google.com/spreadsheets/d/1tvCszs_7FXrjei9_mtFiKV6nW1FLnYyvPvW-qNZhdog/edit?usp=sharing
Highlights:
- deferred_list now really exists in struct page instead of just a comment.
- hmm_data also exists in struct page instead of being a nasty hack.
- x86's PGD pages have a real pointer to the mm_struct.
- VMalloc pages now have all sorts of extra information stored in them
to help with debugging and tuning.
- rcu_head is no longer tied to slab in case anyone else wants to
free pages by RCU.
- slub's counters no longer share space with _refcount.
- slub's freelist+counters are now naturally dword aligned.
- slub loses a parameter to a lot of functions and a sysfs file.
This patch (of 17):
s390 borrows the storage used for _mapcount in struct page in order to
account whether the bottom or top half is being used for 2kB page tables.
I want to use that for something else, so use the top byte of _refcount
instead of the bottom byte of _mapcount. _refcount may temporarily be
incremented by other CPUs that see a stale pointer to this page in the
page cache, but each CPU can only increment it by one, and there are no
systems with 2^24 CPUs today, so they will not change the upper byte of
_refcount. We do have to be a little careful not to lose any of their
writes (as they will subsequently decrement the counter).
Link: http://lkml.kernel.org/r/20180518194519.3820-2-willy@infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | arch/s390/mm/pgalloc.c | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 562f72955956..84bd6329a88d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c | |||
| @@ -190,14 +190,15 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
| 190 | if (!list_empty(&mm->context.pgtable_list)) { | 190 | if (!list_empty(&mm->context.pgtable_list)) { |
| 191 | page = list_first_entry(&mm->context.pgtable_list, | 191 | page = list_first_entry(&mm->context.pgtable_list, |
| 192 | struct page, lru); | 192 | struct page, lru); |
| 193 | mask = atomic_read(&page->_mapcount); | 193 | mask = atomic_read(&page->_refcount) >> 24; |
| 194 | mask = (mask | (mask >> 4)) & 3; | 194 | mask = (mask | (mask >> 4)) & 3; |
| 195 | if (mask != 3) { | 195 | if (mask != 3) { |
| 196 | table = (unsigned long *) page_to_phys(page); | 196 | table = (unsigned long *) page_to_phys(page); |
| 197 | bit = mask & 1; /* =1 -> second 2K */ | 197 | bit = mask & 1; /* =1 -> second 2K */ |
| 198 | if (bit) | 198 | if (bit) |
| 199 | table += PTRS_PER_PTE; | 199 | table += PTRS_PER_PTE; |
| 200 | atomic_xor_bits(&page->_mapcount, 1U << bit); | 200 | atomic_xor_bits(&page->_refcount, |
| 201 | 1U << (bit + 24)); | ||
| 201 | list_del(&page->lru); | 202 | list_del(&page->lru); |
| 202 | } | 203 | } |
| 203 | } | 204 | } |
| @@ -218,12 +219,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm) | |||
| 218 | table = (unsigned long *) page_to_phys(page); | 219 | table = (unsigned long *) page_to_phys(page); |
| 219 | if (mm_alloc_pgste(mm)) { | 220 | if (mm_alloc_pgste(mm)) { |
| 220 | /* Return 4K page table with PGSTEs */ | 221 | /* Return 4K page table with PGSTEs */ |
| 221 | atomic_set(&page->_mapcount, 3); | 222 | atomic_xor_bits(&page->_refcount, 3 << 24); |
| 222 | memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); | 223 | memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); |
| 223 | memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); | 224 | memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); |
| 224 | } else { | 225 | } else { |
| 225 | /* Return the first 2K fragment of the page */ | 226 | /* Return the first 2K fragment of the page */ |
| 226 | atomic_set(&page->_mapcount, 1); | 227 | atomic_xor_bits(&page->_refcount, 1 << 24); |
| 227 | memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); | 228 | memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); |
| 228 | spin_lock_bh(&mm->context.lock); | 229 | spin_lock_bh(&mm->context.lock); |
| 229 | list_add(&page->lru, &mm->context.pgtable_list); | 230 | list_add(&page->lru, &mm->context.pgtable_list); |
| @@ -242,7 +243,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
| 242 | /* Free 2K page table fragment of a 4K page */ | 243 | /* Free 2K page table fragment of a 4K page */ |
| 243 | bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); | 244 | bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); |
| 244 | spin_lock_bh(&mm->context.lock); | 245 | spin_lock_bh(&mm->context.lock); |
| 245 | mask = atomic_xor_bits(&page->_mapcount, 1U << bit); | 246 | mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); |
| 247 | mask >>= 24; | ||
| 246 | if (mask & 3) | 248 | if (mask & 3) |
| 247 | list_add(&page->lru, &mm->context.pgtable_list); | 249 | list_add(&page->lru, &mm->context.pgtable_list); |
| 248 | else | 250 | else |
| @@ -253,7 +255,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) | |||
| 253 | } | 255 | } |
| 254 | 256 | ||
| 255 | pgtable_page_dtor(page); | 257 | pgtable_page_dtor(page); |
| 256 | atomic_set(&page->_mapcount, -1); | ||
| 257 | __free_page(page); | 258 | __free_page(page); |
| 258 | } | 259 | } |
| 259 | 260 | ||
| @@ -274,7 +275,8 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, | |||
| 274 | } | 275 | } |
| 275 | bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); | 276 | bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); |
| 276 | spin_lock_bh(&mm->context.lock); | 277 | spin_lock_bh(&mm->context.lock); |
| 277 | mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); | 278 | mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); |
| 279 | mask >>= 24; | ||
| 278 | if (mask & 3) | 280 | if (mask & 3) |
| 279 | list_add_tail(&page->lru, &mm->context.pgtable_list); | 281 | list_add_tail(&page->lru, &mm->context.pgtable_list); |
| 280 | else | 282 | else |
| @@ -296,12 +298,13 @@ static void __tlb_remove_table(void *_table) | |||
| 296 | break; | 298 | break; |
| 297 | case 1: /* lower 2K of a 4K page table */ | 299 | case 1: /* lower 2K of a 4K page table */ |
| 298 | case 2: /* higher 2K of a 4K page table */ | 300 | case 2: /* higher 2K of a 4K page table */ |
| 299 | if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) | 301 | mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); |
| 302 | mask >>= 24; | ||
| 303 | if (mask != 0) | ||
| 300 | break; | 304 | break; |
| 301 | /* fallthrough */ | 305 | /* fallthrough */ |
| 302 | case 3: /* 4K page table with pgstes */ | 306 | case 3: /* 4K page table with pgstes */ |
| 303 | pgtable_page_dtor(page); | 307 | pgtable_page_dtor(page); |
| 304 | atomic_set(&page->_mapcount, -1); | ||
| 305 | __free_page(page); | 308 | __free_page(page); |
| 306 | break; | 309 | break; |
| 307 | } | 310 | } |
