diff options
Diffstat (limited to 'arch/x86/mm/pgtable_32.c')
-rw-r--r-- | arch/x86/mm/pgtable_32.c | 145 |
1 files changed, 74 insertions, 71 deletions
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index be61a1d845a4..2ae5999a795a 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -195,11 +195,6 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
195 | return pte; | 195 | return pte; |
196 | } | 196 | } |
197 | 197 | ||
198 | void pmd_ctor(struct kmem_cache *cache, void *pmd) | ||
199 | { | ||
200 | memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | ||
201 | } | ||
202 | |||
203 | /* | 198 | /* |
204 | * List of all pgd's needed for non-PAE so it can invalidate entries | 199 | * List of all pgd's needed for non-PAE so it can invalidate entries |
205 | * in both cached and uncached pgd's; not needed for PAE since the | 200 | * in both cached and uncached pgd's; not needed for PAE since the |
@@ -210,27 +205,18 @@ void pmd_ctor(struct kmem_cache *cache, void *pmd) | |||
210 | * vmalloc faults work because attached pagetables are never freed. | 205 | * vmalloc faults work because attached pagetables are never freed. |
211 | * -- wli | 206 | * -- wli |
212 | */ | 207 | */ |
213 | DEFINE_SPINLOCK(pgd_lock); | ||
214 | struct page *pgd_list; | ||
215 | |||
216 | static inline void pgd_list_add(pgd_t *pgd) | 208 | static inline void pgd_list_add(pgd_t *pgd) |
217 | { | 209 | { |
218 | struct page *page = virt_to_page(pgd); | 210 | struct page *page = virt_to_page(pgd); |
219 | page->index = (unsigned long)pgd_list; | 211 | |
220 | if (pgd_list) | 212 | list_add(&page->lru, &pgd_list); |
221 | set_page_private(pgd_list, (unsigned long)&page->index); | ||
222 | pgd_list = page; | ||
223 | set_page_private(page, (unsigned long)&pgd_list); | ||
224 | } | 213 | } |
225 | 214 | ||
226 | static inline void pgd_list_del(pgd_t *pgd) | 215 | static inline void pgd_list_del(pgd_t *pgd) |
227 | { | 216 | { |
228 | struct page *next, **pprev, *page = virt_to_page(pgd); | 217 | struct page *page = virt_to_page(pgd); |
229 | next = (struct page *)page->index; | 218 | |
230 | pprev = (struct page **)page_private(page); | 219 | list_del(&page->lru); |
231 | *pprev = next; | ||
232 | if (next) | ||
233 | set_page_private(next, (unsigned long)pprev); | ||
234 | } | 220 | } |
235 | 221 | ||
236 | 222 | ||
@@ -285,7 +271,6 @@ static void pgd_dtor(void *pgd) | |||
285 | if (SHARED_KERNEL_PMD) | 271 | if (SHARED_KERNEL_PMD) |
286 | return; | 272 | return; |
287 | 273 | ||
288 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | ||
289 | spin_lock_irqsave(&pgd_lock, flags); | 274 | spin_lock_irqsave(&pgd_lock, flags); |
290 | pgd_list_del(pgd); | 275 | pgd_list_del(pgd); |
291 | spin_unlock_irqrestore(&pgd_lock, flags); | 276 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -294,77 +279,96 @@ static void pgd_dtor(void *pgd) | |||
294 | #define UNSHARED_PTRS_PER_PGD \ | 279 | #define UNSHARED_PTRS_PER_PGD \ |
295 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | 280 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) |
296 | 281 | ||
297 | /* If we allocate a pmd for part of the kernel address space, then | 282 | #ifdef CONFIG_X86_PAE |
298 | make sure its initialized with the appropriate kernel mappings. | 283 | /* |
299 | Otherwise use a cached zeroed pmd. */ | 284 | * Mop up any pmd pages which may still be attached to the pgd. |
300 | static pmd_t *pmd_cache_alloc(int idx) | 285 | * Normally they will be freed by munmap/exit_mmap, but any pmd we |
286 | * preallocate which never got a corresponding vma will need to be | ||
287 | * freed manually. | ||
288 | */ | ||
289 | static void pgd_mop_up_pmds(pgd_t *pgdp) | ||
301 | { | 290 | { |
302 | pmd_t *pmd; | 291 | int i; |
303 | 292 | ||
304 | if (idx >= USER_PTRS_PER_PGD) { | 293 | for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) { |
305 | pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | 294 | pgd_t pgd = pgdp[i]; |
306 | 295 | ||
307 | if (pmd) | 296 | if (pgd_val(pgd) != 0) { |
308 | memcpy(pmd, | 297 | pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); |
309 | (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | 298 | |
299 | pgdp[i] = native_make_pgd(0); | ||
300 | |||
301 | paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT); | ||
302 | pmd_free(pmd); | ||
303 | } | ||
304 | } | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * In PAE mode, we need to do a cr3 reload (=tlb flush) when | ||
309 | * updating the top-level pagetable entries to guarantee the | ||
310 | * processor notices the update. Since this is expensive, and | ||
311 | * all 4 top-level entries are used almost immediately in a | ||
312 | * new process's life, we just pre-populate them here. | ||
313 | * | ||
314 | * Also, if we're in a paravirt environment where the kernel pmd is | ||
315 | * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate | ||
316 | * and initialize the kernel pmds here. | ||
317 | */ | ||
318 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
319 | { | ||
320 | pud_t *pud; | ||
321 | unsigned long addr; | ||
322 | int i; | ||
323 | |||
324 | pud = pud_offset(pgd, 0); | ||
325 | for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; | ||
326 | i++, pud++, addr += PUD_SIZE) { | ||
327 | pmd_t *pmd = pmd_alloc_one(mm, addr); | ||
328 | |||
329 | if (!pmd) { | ||
330 | pgd_mop_up_pmds(pgd); | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | if (i >= USER_PTRS_PER_PGD) | ||
335 | memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), | ||
310 | sizeof(pmd_t) * PTRS_PER_PMD); | 336 | sizeof(pmd_t) * PTRS_PER_PMD); |
311 | } else | ||
312 | pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
313 | 337 | ||
314 | return pmd; | 338 | pud_populate(mm, pud, pmd); |
339 | } | ||
340 | |||
341 | return 1; | ||
342 | } | ||
343 | #else /* !CONFIG_X86_PAE */ | ||
344 | /* No need to prepopulate any pagetable entries in non-PAE modes. */ | ||
345 | static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd) | ||
346 | { | ||
347 | return 1; | ||
315 | } | 348 | } |
316 | 349 | ||
317 | static void pmd_cache_free(pmd_t *pmd, int idx) | 350 | static void pgd_mop_up_pmds(pgd_t *pgd) |
318 | { | 351 | { |
319 | if (idx >= USER_PTRS_PER_PGD) | ||
320 | free_page((unsigned long)pmd); | ||
321 | else | ||
322 | kmem_cache_free(pmd_cache, pmd); | ||
323 | } | 352 | } |
353 | #endif /* CONFIG_X86_PAE */ | ||
324 | 354 | ||
325 | pgd_t *pgd_alloc(struct mm_struct *mm) | 355 | pgd_t *pgd_alloc(struct mm_struct *mm) |
326 | { | 356 | { |
327 | int i; | ||
328 | pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); | 357 | pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); |
329 | 358 | ||
330 | if (PTRS_PER_PMD == 1 || !pgd) | 359 | mm->pgd = pgd; /* so that alloc_pd can use it */ |
331 | return pgd; | ||
332 | 360 | ||
333 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | 361 | if (pgd && !pgd_prepopulate_pmd(mm, pgd)) { |
334 | pmd_t *pmd = pmd_cache_alloc(i); | 362 | quicklist_free(0, pgd_dtor, pgd); |
335 | 363 | pgd = NULL; | |
336 | if (!pmd) | ||
337 | goto out_oom; | ||
338 | |||
339 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | ||
340 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | ||
341 | } | 364 | } |
342 | return pgd; | ||
343 | 365 | ||
344 | out_oom: | 366 | return pgd; |
345 | for (i--; i >= 0; i--) { | ||
346 | pgd_t pgdent = pgd[i]; | ||
347 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
348 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
349 | pmd_cache_free(pmd, i); | ||
350 | } | ||
351 | quicklist_free(0, pgd_dtor, pgd); | ||
352 | return NULL; | ||
353 | } | 367 | } |
354 | 368 | ||
355 | void pgd_free(pgd_t *pgd) | 369 | void pgd_free(pgd_t *pgd) |
356 | { | 370 | { |
357 | int i; | 371 | pgd_mop_up_pmds(pgd); |
358 | |||
359 | /* in the PAE case user pgd entries are overwritten before usage */ | ||
360 | if (PTRS_PER_PMD > 1) | ||
361 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { | ||
362 | pgd_t pgdent = pgd[i]; | ||
363 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
364 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
365 | pmd_cache_free(pmd, i); | ||
366 | } | ||
367 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | ||
368 | quicklist_free(0, pgd_dtor, pgd); | 372 | quicklist_free(0, pgd_dtor, pgd); |
369 | } | 373 | } |
370 | 374 | ||
@@ -372,4 +376,3 @@ void check_pgt_cache(void) | |||
372 | { | 376 | { |
373 | quicklist_trim(0, pgd_dtor, 25, 16); | 377 | quicklist_trim(0, pgd_dtor, 25, 16); |
374 | } | 378 | } |
375 | |||