diff options
Diffstat (limited to 'arch/x86/mm/pgtable.c')
-rw-r--r-- | arch/x86/mm/pgtable.c | 104 |
1 files changed, 88 insertions, 16 deletions
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5c4ee422590e..8573b83a63d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
87 | #define UNSHARED_PTRS_PER_PGD \ | 87 | #define UNSHARED_PTRS_PER_PGD \ |
88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) |
89 | 89 | ||
90 | static void pgd_ctor(pgd_t *pgd) | 90 | |
91 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) | ||
92 | { | ||
93 | BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); | ||
94 | virt_to_page(pgd)->index = (pgoff_t)mm; | ||
95 | } | ||
96 | |||
97 | struct mm_struct *pgd_page_get_mm(struct page *page) | ||
98 | { | ||
99 | return (struct mm_struct *)page->index; | ||
100 | } | ||
101 | |||
102 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | ||
91 | { | 103 | { |
92 | /* If the pgd points to a shared pagetable level (either the | 104 | /* If the pgd points to a shared pagetable level (either the |
93 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 105 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
@@ -98,27 +110,23 @@ static void pgd_ctor(pgd_t *pgd) | |||
98 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, | 110 | clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, |
99 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 111 | swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
100 | KERNEL_PGD_PTRS); | 112 | KERNEL_PGD_PTRS); |
101 | paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
102 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
103 | KERNEL_PGD_BOUNDARY, | ||
104 | KERNEL_PGD_PTRS); | ||
105 | } | 113 | } |
106 | 114 | ||
107 | /* list required to sync kernel mapping updates */ | 115 | /* list required to sync kernel mapping updates */ |
108 | if (!SHARED_KERNEL_PMD) | 116 | if (!SHARED_KERNEL_PMD) { |
117 | pgd_set_mm(pgd, mm); | ||
109 | pgd_list_add(pgd); | 118 | pgd_list_add(pgd); |
119 | } | ||
110 | } | 120 | } |
111 | 121 | ||
112 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
113 | { | 123 | { |
114 | unsigned long flags; /* can be called from interrupt context */ | ||
115 | |||
116 | if (SHARED_KERNEL_PMD) | 124 | if (SHARED_KERNEL_PMD) |
117 | return; | 125 | return; |
118 | 126 | ||
119 | spin_lock_irqsave(&pgd_lock, flags); | 127 | spin_lock(&pgd_lock); |
120 | pgd_list_del(pgd); | 128 | pgd_list_del(pgd); |
121 | spin_unlock_irqrestore(&pgd_lock, flags); | 129 | spin_unlock(&pgd_lock); |
122 | } | 130 | } |
123 | 131 | ||
124 | /* | 132 | /* |
@@ -160,8 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | |||
160 | * section 8.1: in PAE mode we explicitly have to flush the | 168 | * section 8.1: in PAE mode we explicitly have to flush the |
161 | * TLB via cr3 if the top-level pgd is changed... | 169 | * TLB via cr3 if the top-level pgd is changed... |
162 | */ | 170 | */ |
163 | if (mm == current->active_mm) | 171 | flush_tlb_mm(mm); |
164 | write_cr3(read_cr3()); | ||
165 | } | 172 | } |
166 | #else /* !CONFIG_X86_PAE */ | 173 | #else /* !CONFIG_X86_PAE */ |
167 | 174 | ||
@@ -250,7 +257,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
250 | { | 257 | { |
251 | pgd_t *pgd; | 258 | pgd_t *pgd; |
252 | pmd_t *pmds[PREALLOCATED_PMDS]; | 259 | pmd_t *pmds[PREALLOCATED_PMDS]; |
253 | unsigned long flags; | ||
254 | 260 | ||
255 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); | 261 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
256 | 262 | ||
@@ -270,12 +276,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
270 | * respect to anything walking the pgd_list, so that they | 276 | * respect to anything walking the pgd_list, so that they |
271 | * never see a partially populated pgd. | 277 | * never see a partially populated pgd. |
272 | */ | 278 | */ |
273 | spin_lock_irqsave(&pgd_lock, flags); | 279 | spin_lock(&pgd_lock); |
274 | 280 | ||
275 | pgd_ctor(pgd); | 281 | pgd_ctor(mm, pgd); |
276 | pgd_prepopulate_pmd(mm, pgd, pmds); | 282 | pgd_prepopulate_pmd(mm, pgd, pmds); |
277 | 283 | ||
278 | spin_unlock_irqrestore(&pgd_lock, flags); | 284 | spin_unlock(&pgd_lock); |
279 | 285 | ||
280 | return pgd; | 286 | return pgd; |
281 | 287 | ||
@@ -310,6 +316,25 @@ int ptep_set_access_flags(struct vm_area_struct *vma, | |||
310 | return changed; | 316 | return changed; |
311 | } | 317 | } |
312 | 318 | ||
319 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
320 | int pmdp_set_access_flags(struct vm_area_struct *vma, | ||
321 | unsigned long address, pmd_t *pmdp, | ||
322 | pmd_t entry, int dirty) | ||
323 | { | ||
324 | int changed = !pmd_same(*pmdp, entry); | ||
325 | |||
326 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
327 | |||
328 | if (changed && dirty) { | ||
329 | *pmdp = entry; | ||
330 | pmd_update_defer(vma->vm_mm, address, pmdp); | ||
331 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
332 | } | ||
333 | |||
334 | return changed; | ||
335 | } | ||
336 | #endif | ||
337 | |||
313 | int ptep_test_and_clear_young(struct vm_area_struct *vma, | 338 | int ptep_test_and_clear_young(struct vm_area_struct *vma, |
314 | unsigned long addr, pte_t *ptep) | 339 | unsigned long addr, pte_t *ptep) |
315 | { | 340 | { |
@@ -325,6 +350,23 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
325 | return ret; | 350 | return ret; |
326 | } | 351 | } |
327 | 352 | ||
353 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
354 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
355 | unsigned long addr, pmd_t *pmdp) | ||
356 | { | ||
357 | int ret = 0; | ||
358 | |||
359 | if (pmd_young(*pmdp)) | ||
360 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | ||
361 | (unsigned long *)pmdp); | ||
362 | |||
363 | if (ret) | ||
364 | pmd_update(vma->vm_mm, addr, pmdp); | ||
365 | |||
366 | return ret; | ||
367 | } | ||
368 | #endif | ||
369 | |||
328 | int ptep_clear_flush_young(struct vm_area_struct *vma, | 370 | int ptep_clear_flush_young(struct vm_area_struct *vma, |
329 | unsigned long address, pte_t *ptep) | 371 | unsigned long address, pte_t *ptep) |
330 | { | 372 | { |
@@ -337,6 +379,36 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
337 | return young; | 379 | return young; |
338 | } | 380 | } |
339 | 381 | ||
382 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
383 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
384 | unsigned long address, pmd_t *pmdp) | ||
385 | { | ||
386 | int young; | ||
387 | |||
388 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
389 | |||
390 | young = pmdp_test_and_clear_young(vma, address, pmdp); | ||
391 | if (young) | ||
392 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
393 | |||
394 | return young; | ||
395 | } | ||
396 | |||
397 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
398 | unsigned long address, pmd_t *pmdp) | ||
399 | { | ||
400 | int set; | ||
401 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
402 | set = !test_and_set_bit(_PAGE_BIT_SPLITTING, | ||
403 | (unsigned long *)pmdp); | ||
404 | if (set) { | ||
405 | pmd_update(vma->vm_mm, address, pmdp); | ||
406 | /* need tlb flush only to serialize against gup-fast */ | ||
407 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
408 | } | ||
409 | } | ||
410 | #endif | ||
411 | |||
340 | /** | 412 | /** |
341 | * reserve_top_address - reserves a hole in the top of kernel address space | 413 | * reserve_top_address - reserves a hole in the top of kernel address space |
342 | * @reserve - size of hole to reserve | 414 | * @reserve - size of hole to reserve |