diff options
Diffstat (limited to 'arch/x86/mm/pgtable.c')
| -rw-r--r-- | arch/x86/mm/pgtable.c | 104 |
1 files changed, 88 insertions, 16 deletions
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5c4ee422590e..8573b83a63d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
| @@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
| 87 | #define UNSHARED_PTRS_PER_PGD \ | 87 | #define UNSHARED_PTRS_PER_PGD \ |
| 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) |
| 89 | 89 | ||
| 90 | static void pgd_ctor(pgd_t *pgd) | 90 | |
| 91 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) | ||
| 92 | { | ||
| 93 | BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); | ||
| 94 | virt_to_page(pgd)->index = (pgoff_t)mm; | ||
| 95 | } | ||
| 96 | |||
| 97 | struct mm_struct *pgd_page_get_mm(struct page *page) | ||
| 98 | { | ||
| 99 | return (struct mm_struct *)page->index; | ||
| 100 | } | ||
| 101 | |||
| 102 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | ||
| 91 | { | 103 | { |
| 92 | /* If the pgd points to a shared pagetable level (either the | 104 | /* If the pgd points to a shared pagetable level (either the |
| 93 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 105 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
| @@ -98,27 +110,23 @@ static void pgd_ctor(pgd_t *pgd) | |||
| 98 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 110 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
| 99 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 111 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
| 100 | KERNEL_PGD_PTRS); | 112 | KERNEL_PGD_PTRS); |
| 101 | paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
| 102 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
| 103 | KERNEL_PGD_BOUNDARY, | ||
| 104 | KERNEL_PGD_PTRS); | ||
| 105 | } | 113 | } |
| 106 | 114 | ||
| 107 | /* list required to sync kernel mapping updates */ | 115 | /* list required to sync kernel mapping updates */ |
| 108 | if (!SHARED_KERNEL_PMD) | 116 | if (!SHARED_KERNEL_PMD) { |
| 117 | pgd_set_mm(pgd, mm); | ||
| 109 | pgd_list_add(pgd); | 118 | pgd_list_add(pgd); |
| 119 | } | ||
| 110 | } | 120 | } |
| 111 | 121 | ||
| 112 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
| 113 | { | 123 | { |
| 114 | unsigned long flags; /* can be called from interrupt context */ | ||
| 115 | |||
| 116 | if (SHARED_KERNEL_PMD) | 124 | if (SHARED_KERNEL_PMD) |
| 117 | return; | 125 | return; |
| 118 | 126 | ||
| 119 | spin_lock_irqsave(&pgd_lock, flags); | 127 | spin_lock(&pgd_lock); |
| 120 | pgd_list_del(pgd); | 128 | pgd_list_del(pgd); |
| 121 | spin_unlock_irqrestore(&pgd_lock, flags); | 129 | spin_unlock(&pgd_lock); |
| 122 | } | 130 | } |
| 123 | 131 | ||
| 124 | /* | 132 | /* |
| @@ -160,8 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | |||
| 160 | * section 8.1: in PAE mode we explicitly have to flush the | 168 | * section 8.1: in PAE mode we explicitly have to flush the |
| 161 | * TLB via cr3 if the top-level pgd is changed... | 169 | * TLB via cr3 if the top-level pgd is changed... |
| 162 | */ | 170 | */ |
| 163 | if (mm == current->active_mm) | 171 | flush_tlb_mm(mm); |
| 164 | write_cr3(read_cr3()); | ||
| 165 | } | 172 | } |
| 166 | #else /* !CONFIG_X86_PAE */ | 173 | #else /* !CONFIG_X86_PAE */ |
| 167 | 174 | ||
| @@ -250,7 +257,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
| 250 | { | 257 | { |
| 251 | pgd_t *pgd; | 258 | pgd_t *pgd; |
| 252 | pmd_t *pmds[PREALLOCATED_PMDS]; | 259 | pmd_t *pmds[PREALLOCATED_PMDS]; |
| 253 | unsigned long flags; | ||
| 254 | 260 | ||
| 255 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); | 261 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
| 256 | 262 | ||
| @@ -270,12 +276,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
| 270 | * respect to anything walking the pgd_list, so that they | 276 | * respect to anything walking the pgd_list, so that they |
| 271 | * never see a partially populated pgd. | 277 | * never see a partially populated pgd. |
| 272 | */ | 278 | */ |
| 273 | spin_lock_irqsave(&pgd_lock, flags); | 279 | spin_lock(&pgd_lock); |
| 274 | 280 | ||
| 275 | pgd_ctor(pgd); | 281 | pgd_ctor(mm, pgd); |
| 276 | pgd_prepopulate_pmd(mm, pgd, pmds); | 282 | pgd_prepopulate_pmd(mm, pgd, pmds); |
| 277 | 283 | ||
| 278 | spin_unlock_irqrestore(&pgd_lock, flags); | 284 | spin_unlock(&pgd_lock); |
| 279 | 285 | ||
| 280 | return pgd; | 286 | return pgd; |
| 281 | 287 | ||
| @@ -310,6 +316,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma, | |||
| 310 | return changed; | 316 | return changed; |
| 311 | } | 317 | } |
| 312 | 318 | ||
| 319 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 320 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
| 321 | unsigned long address, pmd_t *pmdp, | ||
| 322 | pmd_t entry, int dirty) | ||
| 323 | { | ||
| 324 | int changed = !pmd_same(*pmdp, entry); | ||
| 325 | |||
| 326 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
| 327 | |||
| 328 | if (changed && dirty) { | ||
| 329 | *pmdp = entry; | ||
| 330 | pmd_update_defer(vma->vm_mm, address, pmdp); | ||
| 331 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
| 332 | } | ||
| 333 | |||
| 334 | return changed; | ||
| 335 | } | ||
| 336 | #endif | ||
| 337 | |||
| 313 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 338 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
| 314 | unsigned long addr, pte_t *ptep) | 339 | unsigned long addr, pte_t *ptep) |
| 315 | { | 340 | { |
| @@ -325,6 +350,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
| 325 | return ret; | 350 | return ret; |
| 326 | } | 351 | } |
| 327 | 352 | ||
| 353 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 354 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
| 355 | unsigned long addr, pmd_t *pmdp) | ||
| 356 | { | ||
| 357 | int ret = 0; | ||
| 358 | |||
| 359 | if (pmd_young(*pmdp)) | ||
| 360 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
| 361 | (unsigned long *)pmdp); | ||
| 362 | |||
| 363 | if (ret) | ||
| 364 | pmd_update(vma->vm_mm, addr, pmdp); | ||
| 365 | |||
| 366 | return ret; | ||
| 367 | } | ||
| 368 | #endif | ||
| 369 | |||
| 328 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 370 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
| 329 | unsigned long address, pte_t *ptep) | 371 | unsigned long address, pte_t *ptep) |
| 330 | { | 372 | { |
| @@ -337,6 +379,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
| 337 | return young; | 379 | return young; |
| 338 | } | 380 | } |
| 339 | 381 | ||
| 382 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 383 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
| 384 | unsigned long address, pmd_t *pmdp) | ||
| 385 | { | ||
| 386 | int young; | ||
| 387 | |||
| 388 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
| 389 | |||
| 390 | young = pmdp_test_and_clear_young(vma, address, pmdp); | ||
| 391 | if (young) | ||
| 392 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
| 393 | |||
| 394 | return young; | ||
| 395 | } | ||
| 396 | |||
| 397 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
| 398 | unsigned long address, pmd_t *pmdp) | ||
| 399 | { | ||
| 400 | int set; | ||
| 401 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
| 402 | set = !test_and_set_bit(_PAGE_BIT_SPLITTING, | ||
| 403 | (unsigned long *)pmdp); | ||
| 404 | if (set) { | ||
| 405 | pmd_update(vma->vm_mm, address, pmdp); | ||
| 406 | /* need tlb flush only to serialize against gup-fast */ | ||
| 407 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | #endif | ||
| 411 | |||
| 340 | /** | 412 | /** |
| 341 | * reserve_top_address - reserves a hole in the top of kernel address space | 413 | * reserve_top_address - reserves a hole in the top of kernel address space |
| 342 | * @reserve - size of hole to reserve | 414 | * @reserve - size of hole to reserve |
