aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2013-06-20 05:00:15 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-06-21 02:01:53 -0400
commit074c2eae3e9b66c03a17a12df8f2cd19382b68ab (patch)
tree1606baa1df8093b5ca0ef638236b2ace837e1d98
parentf940f5289873af2ad2c4e73f88c24ad2b8fe3f87 (diff)
powerpc/THP: Implement transparent hugepages for ppc64
We now have pmd entries covering 16MB range and the PMD table double its original size. We use the second half of the PMD table to deposit the pgtable (PTE page). The depoisted PTE page is further used to track the HPTE information. The information include [ secondary group | 3 bit hidx | valid ]. We use one byte per each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and with 4K HPTE we need 4096 entries. Both will fit in a 4K PTE page. On hugepage invalidate we need to walk the PTE page and invalidate all valid HPTEs. This patch implements necessary arch specific functions for THP support and also hugepage invalidate logic. These PMD related functions are intentionally kept similar to their PTE counter-part. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h215
-rw-r--r--arch/powerpc/include/asm/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/tlbflush.h3
-rw-r--r--arch/powerpc/mm/pgtable_64.c377
-rw-r--r--arch/powerpc/mm/tlb_hash64.c27
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype1
6 files changed, 625 insertions, 2 deletions
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index ab843328b47f..8f9da5e32fea 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -10,6 +10,7 @@
10#else 10#else
11#include <asm/pgtable-ppc64-4k.h> 11#include <asm/pgtable-ppc64-4k.h>
12#endif 12#endif
13#include <asm/barrier.h>
13 14
14#define FIRST_USER_ADDRESS 0 15#define FIRST_USER_ADDRESS 0
15 16
@@ -154,7 +155,7 @@
154#define pmd_present(pmd) (pmd_val(pmd) != 0) 155#define pmd_present(pmd) (pmd_val(pmd) != 0)
155#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 156#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
156#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) 157#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
157#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) 158extern struct page *pmd_page(pmd_t pmd);
158 159
159#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) 160#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
160#define pud_none(pud) (!pud_val(pud)) 161#define pud_none(pud) (!pud_val(pud))
@@ -382,4 +383,216 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
382 383
383#endif /* __ASSEMBLY__ */ 384#endif /* __ASSEMBLY__ */
384 385
386/*
387 * THP pages can't be special. So use the _PAGE_SPECIAL
388 */
389#define _PAGE_SPLITTING _PAGE_SPECIAL
390
391/*
392 * We need to differentiate between explicit huge page and THP huge
393 * page, since THP huge page also need to track real subpage details
394 */
395#define _PAGE_THP_HUGE _PAGE_4K_PFN
396
397/*
398 * set of bits not changed in pmd_modify.
399 */
400#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
401 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
402 _PAGE_THP_HUGE)
403
404#ifndef __ASSEMBLY__
405/*
406 * The linux hugepage PMD now include the pmd entries followed by the address
407 * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
408 * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
409 * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
410 * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
411 *
412 * The last three bits are intentionally left to zero. This memory location
413 * are also used as normal page PTE pointers. So if we have any pointers
414 * left around while we collapse a hugepage, we need to make sure
415 * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them
416 */
417static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
418{
419 return (hpte_slot_array[index] >> 3) & 0x1;
420}
421
422static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
423 int index)
424{
425 return hpte_slot_array[index] >> 4;
426}
427
428static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
429 unsigned int index, unsigned int hidx)
430{
431 hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
432}
433
434static inline char *get_hpte_slot_array(pmd_t *pmdp)
435{
436 /*
437 * The hpte hindex is stored in the pgtable whose address is in the
438 * second half of the PMD
439 *
440 * Order this load with the test for pmd_trans_huge in the caller
441 */
442 smp_rmb();
443 return *(char **)(pmdp + PTRS_PER_PMD);
444
445
446}
447
448extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
449 pmd_t *pmdp);
450#ifdef CONFIG_TRANSPARENT_HUGEPAGE
451extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
452extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
453extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
454extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
455 pmd_t *pmdp, pmd_t pmd);
456extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
457 pmd_t *pmd);
458
459static inline int pmd_trans_huge(pmd_t pmd)
460{
461 /*
462 * leaf pte for huge page, bottom two bits != 00
463 */
464 return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
465}
466
467static inline int pmd_large(pmd_t pmd)
468{
469 /*
470 * leaf pte for huge page, bottom two bits != 00
471 */
472 if (pmd_trans_huge(pmd))
473 return pmd_val(pmd) & _PAGE_PRESENT;
474 return 0;
475}
476
477static inline int pmd_trans_splitting(pmd_t pmd)
478{
479 if (pmd_trans_huge(pmd))
480 return pmd_val(pmd) & _PAGE_SPLITTING;
481 return 0;
482}
483
484/* We will enable it in the last patch */
485#define has_transparent_hugepage() 0
486#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
487
488static inline pte_t pmd_pte(pmd_t pmd)
489{
490 return __pte(pmd_val(pmd));
491}
492
493static inline pmd_t pte_pmd(pte_t pte)
494{
495 return __pmd(pte_val(pte));
496}
497
498static inline pte_t *pmdp_ptep(pmd_t *pmd)
499{
500 return (pte_t *)pmd;
501}
502
503#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
504#define pmd_young(pmd) pte_young(pmd_pte(pmd))
505#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
506#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
507#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
508#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
509#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
510
511#define __HAVE_ARCH_PMD_WRITE
512#define pmd_write(pmd) pte_write(pmd_pte(pmd))
513
514static inline pmd_t pmd_mkhuge(pmd_t pmd)
515{
516 /* Do nothing, mk_pmd() does this part. */
517 return pmd;
518}
519
520static inline pmd_t pmd_mknotpresent(pmd_t pmd)
521{
522 pmd_val(pmd) &= ~_PAGE_PRESENT;
523 return pmd;
524}
525
526static inline pmd_t pmd_mksplitting(pmd_t pmd)
527{
528 pmd_val(pmd) |= _PAGE_SPLITTING;
529 return pmd;
530}
531
532#define __HAVE_ARCH_PMD_SAME
533static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
534{
535 return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
536}
537
538#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
539extern int pmdp_set_access_flags(struct vm_area_struct *vma,
540 unsigned long address, pmd_t *pmdp,
541 pmd_t entry, int dirty);
542
543extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
544 unsigned long addr,
545 pmd_t *pmdp, unsigned long clr);
546
547static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
548 unsigned long addr, pmd_t *pmdp)
549{
550 unsigned long old;
551
552 if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
553 return 0;
554 old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED);
555 return ((old & _PAGE_ACCESSED) != 0);
556}
557
558#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
559extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
560 unsigned long address, pmd_t *pmdp);
561#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
562extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
563 unsigned long address, pmd_t *pmdp);
564
565#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
566extern pmd_t pmdp_get_and_clear(struct mm_struct *mm,
567 unsigned long addr, pmd_t *pmdp);
568
569#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
570extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
571 pmd_t *pmdp);
572
573#define __HAVE_ARCH_PMDP_SET_WRPROTECT
574static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
575 pmd_t *pmdp)
576{
577
578 if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
579 return;
580
581 pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW);
582}
583
584#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
585extern void pmdp_splitting_flush(struct vm_area_struct *vma,
586 unsigned long address, pmd_t *pmdp);
587
588#define __HAVE_ARCH_PGTABLE_DEPOSIT
589extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
590 pgtable_t pgtable);
591#define __HAVE_ARCH_PGTABLE_WITHDRAW
592extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
593
594#define __HAVE_ARCH_PMDP_INVALIDATE
595extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
596 pmd_t *pmdp);
597#endif /* __ASSEMBLY__ */
385#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ 598#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 7aeb9555f6ea..d53db937ec75 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -220,6 +220,10 @@ extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
220 220
221extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, 221extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
222 unsigned long end, int write, struct page **pages, int *nr); 222 unsigned long end, int write, struct page **pages, int *nr);
223#ifndef CONFIG_TRANSPARENT_HUGEPAGE
224#define pmd_large(pmd) 0
225#define has_transparent_hugepage() 0
226#endif
223#endif /* __ASSEMBLY__ */ 227#endif /* __ASSEMBLY__ */
224 228
225#endif /* __KERNEL__ */ 229#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 61a59271665b..2def01ed0cb2 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -165,7 +165,8 @@ static inline void flush_tlb_kernel_range(unsigned long start,
165/* Private function for use by PCI IO mapping code */ 165/* Private function for use by PCI IO mapping code */
166extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, 166extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
167 unsigned long end); 167 unsigned long end);
168 168extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
169 unsigned long addr);
169#else 170#else
170#error Unsupported MMU type 171#error Unsupported MMU type
171#endif 172#endif
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index a854096e1023..e4d3e9fb59be 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap);
338EXPORT_SYMBOL(__iounmap); 338EXPORT_SYMBOL(__iounmap);
339EXPORT_SYMBOL(__iounmap_at); 339EXPORT_SYMBOL(__iounmap_at);
340 340
341/*
342 * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
343 * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
344 */
345struct page *pmd_page(pmd_t pmd)
346{
347#ifdef CONFIG_TRANSPARENT_HUGEPAGE
348 if (pmd_trans_huge(pmd))
349 return pfn_to_page(pmd_pfn(pmd));
350#endif
351 return virt_to_page(pmd_page_vaddr(pmd));
352}
353
341#ifdef CONFIG_PPC_64K_PAGES 354#ifdef CONFIG_PPC_64K_PAGES
342static pte_t *get_from_cache(struct mm_struct *mm) 355static pte_t *get_from_cache(struct mm_struct *mm)
343{ 356{
@@ -455,3 +468,367 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
455} 468}
456#endif 469#endif
457#endif /* CONFIG_PPC_64K_PAGES */ 470#endif /* CONFIG_PPC_64K_PAGES */
471
472#ifdef CONFIG_TRANSPARENT_HUGEPAGE
473
474/*
475 * This is called when relaxing access to a hugepage. It's also called in the page
476 * fault path when we don't hit any of the major fault cases, ie, a minor
477 * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
478 * handled those two for us, we additionally deal with missing execute
479 * permission here on some processors
480 */
481int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
482 pmd_t *pmdp, pmd_t entry, int dirty)
483{
484 int changed;
485#ifdef CONFIG_DEBUG_VM
486 WARN_ON(!pmd_trans_huge(*pmdp));
487 assert_spin_locked(&vma->vm_mm->page_table_lock);
488#endif
489 changed = !pmd_same(*(pmdp), entry);
490 if (changed) {
491 __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
492 /*
493 * Since we are not supporting SW TLB systems, we don't
494 * have any thing similar to flush_tlb_page_nohash()
495 */
496 }
497 return changed;
498}
499
500unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
501 pmd_t *pmdp, unsigned long clr)
502{
503
504 unsigned long old, tmp;
505
506#ifdef CONFIG_DEBUG_VM
507 WARN_ON(!pmd_trans_huge(*pmdp));
508 assert_spin_locked(&mm->page_table_lock);
509#endif
510
511#ifdef PTE_ATOMIC_UPDATES
512 __asm__ __volatile__(
513 "1: ldarx %0,0,%3\n\
514 andi. %1,%0,%6\n\
515 bne- 1b \n\
516 andc %1,%0,%4 \n\
517 stdcx. %1,0,%3 \n\
518 bne- 1b"
519 : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
520 : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY)
521 : "cc" );
522#else
523 old = pmd_val(*pmdp);
524 *pmdp = __pmd(old & ~clr);
525#endif
526 if (old & _PAGE_HASHPTE)
527 hpte_do_hugepage_flush(mm, addr, pmdp);
528 return old;
529}
530
531pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
532 pmd_t *pmdp)
533{
534 pmd_t pmd;
535
536 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
537 if (pmd_trans_huge(*pmdp)) {
538 pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
539 } else {
540 /*
541 * khugepaged calls this for normal pmd
542 */
543 pmd = *pmdp;
544 pmd_clear(pmdp);
545 /*
546 * Wait for all pending hash_page to finish. This is needed
547 * in case of subpage collapse. When we collapse normal pages
548 * to hugepage, we first clear the pmd, then invalidate all
549 * the PTE entries. The assumption here is that any low level
550 * page fault will see a none pmd and take the slow path that
551 * will wait on mmap_sem. But we could very well be in a
552 * hash_page with local ptep pointer value. Such a hash page
553 * can result in adding new HPTE entries for normal subpages.
554 * That means we could be modifying the page content as we
555 * copy them to a huge page. So wait for parallel hash_page
556 * to finish before invalidating HPTE entries. We can do this
557 * by sending an IPI to all the cpus and executing a dummy
558 * function there.
559 */
560 kick_all_cpus_sync();
561 /*
562 * Now invalidate the hpte entries in the range
563 * covered by pmd. This make sure we take a
564 * fault and will find the pmd as none, which will
565 * result in a major fault which takes mmap_sem and
566 * hence wait for collapse to complete. Without this
567 * the __collapse_huge_page_copy can result in copying
568 * the old content.
569 */
570 flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
571 }
572 return pmd;
573}
574
575int pmdp_test_and_clear_young(struct vm_area_struct *vma,
576 unsigned long address, pmd_t *pmdp)
577{
578 return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
579}
580
581/*
582 * We currently remove entries from the hashtable regardless of whether
583 * the entry was young or dirty. The generic routines only flush if the
584 * entry was young or dirty which is not good enough.
585 *
586 * We should be more intelligent about this but for the moment we override
587 * these functions and force a tlb flush unconditionally
588 */
589int pmdp_clear_flush_young(struct vm_area_struct *vma,
590 unsigned long address, pmd_t *pmdp)
591{
592 return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
593}
594
595/*
596 * We mark the pmd splitting and invalidate all the hpte
597 * entries for this hugepage.
598 */
599void pmdp_splitting_flush(struct vm_area_struct *vma,
600 unsigned long address, pmd_t *pmdp)
601{
602 unsigned long old, tmp;
603
604 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
605
606#ifdef CONFIG_DEBUG_VM
607 WARN_ON(!pmd_trans_huge(*pmdp));
608 assert_spin_locked(&vma->vm_mm->page_table_lock);
609#endif
610
611#ifdef PTE_ATOMIC_UPDATES
612
613 __asm__ __volatile__(
614 "1: ldarx %0,0,%3\n\
615 andi. %1,%0,%6\n\
616 bne- 1b \n\
617 ori %1,%0,%4 \n\
618 stdcx. %1,0,%3 \n\
619 bne- 1b"
620 : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
621 : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
622 : "cc" );
623#else
624 old = pmd_val(*pmdp);
625 *pmdp = __pmd(old | _PAGE_SPLITTING);
626#endif
627 /*
628 * If we didn't had the splitting flag set, go and flush the
629 * HPTE entries.
630 */
631 if (!(old & _PAGE_SPLITTING)) {
632 /* We need to flush the hpte */
633 if (old & _PAGE_HASHPTE)
634 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp);
635 }
636}
637
638/*
639 * We want to put the pgtable in pmd and use pgtable for tracking
640 * the base page size hptes
641 */
642void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
643 pgtable_t pgtable)
644{
645 pgtable_t *pgtable_slot;
646 assert_spin_locked(&mm->page_table_lock);
647 /*
648 * we store the pgtable in the second half of PMD
649 */
650 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
651 *pgtable_slot = pgtable;
652 /*
653 * expose the deposited pgtable to other cpus.
654 * before we set the hugepage PTE at pmd level
655 * hash fault code looks at the deposted pgtable
656 * to store hash index values.
657 */
658 smp_wmb();
659}
660
661pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
662{
663 pgtable_t pgtable;
664 pgtable_t *pgtable_slot;
665
666 assert_spin_locked(&mm->page_table_lock);
667 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
668 pgtable = *pgtable_slot;
669 /*
670 * Once we withdraw, mark the entry NULL.
671 */
672 *pgtable_slot = NULL;
673 /*
674 * We store HPTE information in the deposited PTE fragment.
675 * zero out the content on withdraw.
676 */
677 memset(pgtable, 0, PTE_FRAG_SIZE);
678 return pgtable;
679}
680
681/*
682 * set a new huge pmd. We should not be called for updating
683 * an existing pmd entry. That should go via pmd_hugepage_update.
684 */
685void set_pmd_at(struct mm_struct *mm, unsigned long addr,
686 pmd_t *pmdp, pmd_t pmd)
687{
688#ifdef CONFIG_DEBUG_VM
689 WARN_ON(!pmd_none(*pmdp));
690 assert_spin_locked(&mm->page_table_lock);
691 WARN_ON(!pmd_trans_huge(pmd));
692#endif
693 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
694}
695
696void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
697 pmd_t *pmdp)
698{
699 pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT);
700}
701
702/*
703 * A linux hugepage PMD was changed and the corresponding hash table entries
704 * neesd to be flushed.
705 */
706void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
707 pmd_t *pmdp)
708{
709 int ssize, i;
710 unsigned long s_addr;
711 unsigned int psize, valid;
712 unsigned char *hpte_slot_array;
713 unsigned long hidx, vpn, vsid, hash, shift, slot;
714
715 /*
716 * Flush all the hptes mapping this hugepage
717 */
718 s_addr = addr & HPAGE_PMD_MASK;
719 hpte_slot_array = get_hpte_slot_array(pmdp);
720 /*
721 * IF we try to do a HUGE PTE update after a withdraw is done.
722 * we will find the below NULL. This happens when we do
723 * split_huge_page_pmd
724 */
725 if (!hpte_slot_array)
726 return;
727
728 /* get the base page size */
729 psize = get_slice_psize(mm, s_addr);
730 shift = mmu_psize_defs[psize].shift;
731
732 for (i = 0; i < (HPAGE_PMD_SIZE >> shift); i++) {
733 /*
734 * 8 bits per each hpte entries
735 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
736 */
737 valid = hpte_valid(hpte_slot_array, i);
738 if (!valid)
739 continue;
740 hidx = hpte_hash_index(hpte_slot_array, i);
741
742 /* get the vpn */
743 addr = s_addr + (i * (1ul << shift));
744 if (!is_kernel_addr(addr)) {
745 ssize = user_segment_size(addr);
746 vsid = get_vsid(mm->context.id, addr, ssize);
747 WARN_ON(vsid == 0);
748 } else {
749 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
750 ssize = mmu_kernel_ssize;
751 }
752
753 vpn = hpt_vpn(addr, vsid, ssize);
754 hash = hpt_hash(vpn, shift, ssize);
755 if (hidx & _PTEIDX_SECONDARY)
756 hash = ~hash;
757
758 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
759 slot += hidx & _PTEIDX_GROUP_IX;
760 ppc_md.hpte_invalidate(slot, vpn, psize,
761 MMU_PAGE_16M, ssize, 0);
762 }
763}
764
765static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
766{
767 pmd_val(pmd) |= pgprot_val(pgprot);
768 return pmd;
769}
770
771pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
772{
773 pmd_t pmd;
774 /*
775 * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
776 * set. We use this to check THP page at pmd level.
777 * leaf pte for huge page, bottom two bits != 00
778 */
779 pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
780 pmd_val(pmd) |= _PAGE_THP_HUGE;
781 pmd = pmd_set_protbits(pmd, pgprot);
782 return pmd;
783}
784
785pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
786{
787 return pfn_pmd(page_to_pfn(page), pgprot);
788}
789
790pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
791{
792
793 pmd_val(pmd) &= _HPAGE_CHG_MASK;
794 pmd = pmd_set_protbits(pmd, newprot);
795 return pmd;
796}
797
798/*
799 * This is called at the end of handling a user page fault, when the
800 * fault has been handled by updating a HUGE PMD entry in the linux page tables.
801 * We use it to preload an HPTE into the hash table corresponding to
802 * the updated linux HUGE PMD entry.
803 */
804void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
805 pmd_t *pmd)
806{
807 return;
808}
809
810pmd_t pmdp_get_and_clear(struct mm_struct *mm,
811 unsigned long addr, pmd_t *pmdp)
812{
813 pmd_t old_pmd;
814 pgtable_t pgtable;
815 unsigned long old;
816 pgtable_t *pgtable_slot;
817
818 old = pmd_hugepage_update(mm, addr, pmdp, ~0UL);
819 old_pmd = __pmd(old);
820 /*
821 * We have pmd == none and we are holding page_table_lock.
822 * So we can safely go and clear the pgtable hash
823 * index info.
824 */
825 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
826 pgtable = *pgtable_slot;
827 /*
828 * Let's zero out old valid and hash index details
829 * hash fault look at them.
830 */
831 memset(pgtable, 0, PTE_FRAG_SIZE);
832 return old_pmd;
833}
834#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 023ec8a13f38..48bf63ea6525 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -219,3 +219,30 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
219 arch_leave_lazy_mmu_mode(); 219 arch_leave_lazy_mmu_mode();
220 local_irq_restore(flags); 220 local_irq_restore(flags);
221} 221}
222
223void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
224{
225 pte_t *pte;
226 pte_t *start_pte;
227 unsigned long flags;
228
229 addr = _ALIGN_DOWN(addr, PMD_SIZE);
230 /* Note: Normally, we should only ever use a batch within a
231 * PTE locked section. This violates the rule, but will work
232 * since we don't actually modify the PTEs, we just flush the
233 * hash while leaving the PTEs intact (including their reference
234 * to being hashed). This is not the most performance oriented
235 * way to do things but is fine for our needs here.
236 */
237 local_irq_save(flags);
238 arch_enter_lazy_mmu_mode();
239 start_pte = pte_offset_map(pmd, addr);
240 for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
241 unsigned long pteval = pte_val(*pte);
242 if (pteval & _PAGE_HASHPTE)
243 hpte_need_flush(mm, addr, pte, pteval, 0);
244 addr += PAGE_SIZE;
245 }
246 arch_leave_lazy_mmu_mode();
247 local_irq_restore(flags);
248}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 54f3936001aa..ae0aaea9e098 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
71 select PPC_FPU 71 select PPC_FPU
72 select PPC_HAVE_PMU_SUPPORT 72 select PPC_HAVE_PMU_SUPPORT
73 select SYS_SUPPORTS_HUGETLBFS 73 select SYS_SUPPORTS_HUGETLBFS
74 select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
74 75
75config PPC_BOOK3E_64 76config PPC_BOOK3E_64
76 bool "Embedded processors" 77 bool "Embedded processors"