aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pgtable_32.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pgtable_32.c')
-rw-r--r--arch/x86/mm/pgtable_32.c145
1 files changed, 74 insertions, 71 deletions
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index be61a1d845a4..2ae5999a795a 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -195,11 +195,6 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
195 return pte; 195 return pte;
196} 196}
197 197
198void pmd_ctor(struct kmem_cache *cache, void *pmd)
199{
200 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
201}
202
203/* 198/*
204 * List of all pgd's needed for non-PAE so it can invalidate entries 199 * List of all pgd's needed for non-PAE so it can invalidate entries
205 * in both cached and uncached pgd's; not needed for PAE since the 200 * in both cached and uncached pgd's; not needed for PAE since the
@@ -210,27 +205,18 @@ void pmd_ctor(struct kmem_cache *cache, void *pmd)
210 * vmalloc faults work because attached pagetables are never freed. 205 * vmalloc faults work because attached pagetables are never freed.
211 * -- wli 206 * -- wli
212 */ 207 */
213DEFINE_SPINLOCK(pgd_lock);
214struct page *pgd_list;
215
216static inline void pgd_list_add(pgd_t *pgd) 208static inline void pgd_list_add(pgd_t *pgd)
217{ 209{
218 struct page *page = virt_to_page(pgd); 210 struct page *page = virt_to_page(pgd);
219 page->index = (unsigned long)pgd_list; 211
220 if (pgd_list) 212 list_add(&page->lru, &pgd_list);
221 set_page_private(pgd_list, (unsigned long)&page->index);
222 pgd_list = page;
223 set_page_private(page, (unsigned long)&pgd_list);
224} 213}
225 214
226static inline void pgd_list_del(pgd_t *pgd) 215static inline void pgd_list_del(pgd_t *pgd)
227{ 216{
228 struct page *next, **pprev, *page = virt_to_page(pgd); 217 struct page *page = virt_to_page(pgd);
229 next = (struct page *)page->index; 218
230 pprev = (struct page **)page_private(page); 219 list_del(&page->lru);
231 *pprev = next;
232 if (next)
233 set_page_private(next, (unsigned long)pprev);
234} 220}
235 221
236 222
@@ -285,7 +271,6 @@ static void pgd_dtor(void *pgd)
285 if (SHARED_KERNEL_PMD) 271 if (SHARED_KERNEL_PMD)
286 return; 272 return;
287 273
288 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
289 spin_lock_irqsave(&pgd_lock, flags); 274 spin_lock_irqsave(&pgd_lock, flags);
290 pgd_list_del(pgd); 275 pgd_list_del(pgd);
291 spin_unlock_irqrestore(&pgd_lock, flags); 276 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -294,77 +279,96 @@ static void pgd_dtor(void *pgd)
294#define UNSHARED_PTRS_PER_PGD \ 279#define UNSHARED_PTRS_PER_PGD \
295 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) 280 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
296 281
297/* If we allocate a pmd for part of the kernel address space, then 282#ifdef CONFIG_X86_PAE
298 make sure its initialized with the appropriate kernel mappings. 283/*
299 Otherwise use a cached zeroed pmd. */ 284 * Mop up any pmd pages which may still be attached to the pgd.
300static pmd_t *pmd_cache_alloc(int idx) 285 * Normally they will be freed by munmap/exit_mmap, but any pmd we
286 * preallocate which never got a corresponding vma will need to be
287 * freed manually.
288 */
289static void pgd_mop_up_pmds(pgd_t *pgdp)
301{ 290{
302 pmd_t *pmd; 291 int i;
303 292
304 if (idx >= USER_PTRS_PER_PGD) { 293 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
305 pmd = (pmd_t *)__get_free_page(GFP_KERNEL); 294 pgd_t pgd = pgdp[i];
306 295
307 if (pmd) 296 if (pgd_val(pgd) != 0) {
308 memcpy(pmd, 297 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
309 (void *)pgd_page_vaddr(swapper_pg_dir[idx]), 298
299 pgdp[i] = native_make_pgd(0);
300
301 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
302 pmd_free(pmd);
303 }
304 }
305}
306
307/*
308 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
309 * updating the top-level pagetable entries to guarantee the
310 * processor notices the update. Since this is expensive, and
311 * all 4 top-level entries are used almost immediately in a
312 * new process's life, we just pre-populate them here.
313 *
314 * Also, if we're in a paravirt environment where the kernel pmd is
315 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
316 * and initialize the kernel pmds here.
317 */
318static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
319{
320 pud_t *pud;
321 unsigned long addr;
322 int i;
323
324 pud = pud_offset(pgd, 0);
325 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
326 i++, pud++, addr += PUD_SIZE) {
327 pmd_t *pmd = pmd_alloc_one(mm, addr);
328
329 if (!pmd) {
330 pgd_mop_up_pmds(pgd);
331 return 0;
332 }
333
334 if (i >= USER_PTRS_PER_PGD)
335 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
310 sizeof(pmd_t) * PTRS_PER_PMD); 336 sizeof(pmd_t) * PTRS_PER_PMD);
311 } else
312 pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
313 337
314 return pmd; 338 pud_populate(mm, pud, pmd);
339 }
340
341 return 1;
342}
343#else /* !CONFIG_X86_PAE */
344/* No need to prepopulate any pagetable entries in non-PAE modes. */
345static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
346{
347 return 1;
315} 348}
316 349
317static void pmd_cache_free(pmd_t *pmd, int idx) 350static void pgd_mop_up_pmds(pgd_t *pgd)
318{ 351{
319 if (idx >= USER_PTRS_PER_PGD)
320 free_page((unsigned long)pmd);
321 else
322 kmem_cache_free(pmd_cache, pmd);
323} 352}
353#endif /* CONFIG_X86_PAE */
324 354
325pgd_t *pgd_alloc(struct mm_struct *mm) 355pgd_t *pgd_alloc(struct mm_struct *mm)
326{ 356{
327 int i;
328 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); 357 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
329 358
330 if (PTRS_PER_PMD == 1 || !pgd) 359 mm->pgd = pgd; /* so that alloc_pd can use it */
331 return pgd;
332 360
333 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { 361 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
334 pmd_t *pmd = pmd_cache_alloc(i); 362 quicklist_free(0, pgd_dtor, pgd);
335 363 pgd = NULL;
336 if (!pmd)
337 goto out_oom;
338
339 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
340 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
341 } 364 }
342 return pgd;
343 365
344out_oom: 366 return pgd;
345 for (i--; i >= 0; i--) {
346 pgd_t pgdent = pgd[i];
347 void* pmd = (void *)__va(pgd_val(pgdent)-1);
348 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
349 pmd_cache_free(pmd, i);
350 }
351 quicklist_free(0, pgd_dtor, pgd);
352 return NULL;
353} 367}
354 368
355void pgd_free(pgd_t *pgd) 369void pgd_free(pgd_t *pgd)
356{ 370{
357 int i; 371 pgd_mop_up_pmds(pgd);
358
359 /* in the PAE case user pgd entries are overwritten before usage */
360 if (PTRS_PER_PMD > 1)
361 for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
362 pgd_t pgdent = pgd[i];
363 void* pmd = (void *)__va(pgd_val(pgdent)-1);
364 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
365 pmd_cache_free(pmd, i);
366 }
367 /* in the non-PAE case, free_pgtables() clears user pgd entries */
368 quicklist_free(0, pgd_dtor, pgd); 372 quicklist_free(0, pgd_dtor, pgd);
369} 373}
370 374
@@ -372,4 +376,3 @@ void check_pgt_cache(void)
372{ 376{
373 quicklist_trim(0, pgd_dtor, 25, 16); 377 quicklist_trim(0, pgd_dtor, 25, 16);
374} 378}
375