diff options
| -rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 295 | ||||
| -rw-r--r-- | arch/powerpc/mm/init_64.c | 7 | ||||
| -rw-r--r-- | include/asm-powerpc/page_64.h | 1 | ||||
| -rw-r--r-- | include/asm-powerpc/pgalloc.h | 2 |
4 files changed, 269 insertions, 36 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7370f9f33e29..266b8b2ceac9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
| @@ -30,13 +30,66 @@ | |||
| 30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | 30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
| 31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | 31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) |
| 32 | 32 | ||
| 33 | #ifdef CONFIG_PPC_64K_PAGES | ||
| 34 | #define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT) | ||
| 35 | #else | ||
| 36 | #define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT) | ||
| 37 | #endif | ||
| 38 | #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) | ||
| 39 | #define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE) | ||
| 40 | |||
| 41 | #define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE) | ||
| 42 | #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) | ||
| 43 | #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) | ||
| 44 | |||
| 45 | #define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) | ||
| 46 | |||
| 47 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | ||
| 48 | * will choke on pointers to hugepte tables, which is handy for | ||
| 49 | * catching screwups early. */ | ||
| 50 | #define HUGEPD_OK 0x1 | ||
| 51 | |||
| 52 | typedef struct { unsigned long pd; } hugepd_t; | ||
| 53 | |||
| 54 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
| 55 | |||
| 56 | static inline pte_t *hugepd_page(hugepd_t hpd) | ||
| 57 | { | ||
| 58 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | ||
| 59 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | ||
| 60 | } | ||
| 61 | |||
| 62 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) | ||
| 63 | { | ||
| 64 | unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); | ||
| 65 | pte_t *dir = hugepd_page(*hpdp); | ||
| 66 | |||
| 67 | return dir + idx; | ||
| 68 | } | ||
| 69 | |||
| 70 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | ||
| 71 | unsigned long address) | ||
| 72 | { | ||
| 73 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache, | ||
| 74 | GFP_KERNEL|__GFP_REPEAT); | ||
| 75 | |||
| 76 | if (! new) | ||
| 77 | return -ENOMEM; | ||
| 78 | |||
| 79 | spin_lock(&mm->page_table_lock); | ||
| 80 | if (!hugepd_none(*hpdp)) | ||
| 81 | kmem_cache_free(huge_pgtable_cache, new); | ||
| 82 | else | ||
| 83 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | ||
| 84 | spin_unlock(&mm->page_table_lock); | ||
| 85 | return 0; | ||
| 86 | } | ||
| 87 | |||
| 33 | /* Modelled after find_linux_pte() */ | 88 | /* Modelled after find_linux_pte() */ |
| 34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 89 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
| 35 | { | 90 | { |
| 36 | pgd_t *pg; | 91 | pgd_t *pg; |
| 37 | pud_t *pu; | 92 | pud_t *pu; |
| 38 | pmd_t *pm; | ||
| 39 | pte_t *pt; | ||
| 40 | 93 | ||
| 41 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 94 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
| 42 | 95 | ||
| @@ -46,26 +99,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
| 46 | if (!pgd_none(*pg)) { | 99 | if (!pgd_none(*pg)) { |
| 47 | pu = pud_offset(pg, addr); | 100 | pu = pud_offset(pg, addr); |
| 48 | if (!pud_none(*pu)) { | 101 | if (!pud_none(*pu)) { |
| 49 | pm = pmd_offset(pu, addr); | ||
| 50 | #ifdef CONFIG_PPC_64K_PAGES | 102 | #ifdef CONFIG_PPC_64K_PAGES |
| 51 | /* Currently, we use the normal PTE offset within full | 103 | pmd_t *pm; |
| 52 | * size PTE pages, thus our huge PTEs are scattered in | 104 | pm = pmd_offset(pu, addr); |
| 53 | * the PTE page and we do waste some. We may change | 105 | if (!pmd_none(*pm)) |
| 54 | * that in the future, but the current mecanism keeps | 106 | return hugepte_offset((hugepd_t *)pm, addr); |
| 55 | * things much simpler | 107 | #else |
| 56 | */ | 108 | return hugepte_offset((hugepd_t *)pu, addr); |
| 57 | if (!pmd_none(*pm)) { | 109 | #endif |
| 58 | /* Note: pte_offset_* are all equivalent on | ||
| 59 | * ppc64 as we don't have HIGHMEM | ||
| 60 | */ | ||
| 61 | pt = pte_offset_kernel(pm, addr); | ||
| 62 | return pt; | ||
| 63 | } | ||
| 64 | #else /* CONFIG_PPC_64K_PAGES */ | ||
| 65 | /* On 4k pages, we put huge PTEs in the PMD page */ | ||
| 66 | pt = (pte_t *)pm; | ||
| 67 | return pt; | ||
| 68 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
| 69 | } | 110 | } |
| 70 | } | 111 | } |
| 71 | 112 | ||
| @@ -76,8 +117,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
| 76 | { | 117 | { |
| 77 | pgd_t *pg; | 118 | pgd_t *pg; |
| 78 | pud_t *pu; | 119 | pud_t *pu; |
| 79 | pmd_t *pm; | 120 | hugepd_t *hpdp = NULL; |
| 80 | pte_t *pt; | ||
| 81 | 121 | ||
| 82 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 122 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
| 83 | 123 | ||
| @@ -87,23 +127,182 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
| 87 | pu = pud_alloc(mm, pg, addr); | 127 | pu = pud_alloc(mm, pg, addr); |
| 88 | 128 | ||
| 89 | if (pu) { | 129 | if (pu) { |
| 130 | #ifdef CONFIG_PPC_64K_PAGES | ||
| 131 | pmd_t *pm; | ||
| 90 | pm = pmd_alloc(mm, pu, addr); | 132 | pm = pmd_alloc(mm, pu, addr); |
| 91 | if (pm) { | 133 | if (pm) |
| 134 | hpdp = (hugepd_t *)pm; | ||
| 135 | #else | ||
| 136 | hpdp = (hugepd_t *)pu; | ||
| 137 | #endif | ||
| 138 | } | ||
| 139 | |||
| 140 | if (! hpdp) | ||
| 141 | return NULL; | ||
| 142 | |||
| 143 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) | ||
| 144 | return NULL; | ||
| 145 | |||
| 146 | return hugepte_offset(hpdp, addr); | ||
| 147 | } | ||
| 148 | |||
| 149 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) | ||
| 150 | { | ||
| 151 | pte_t *hugepte = hugepd_page(*hpdp); | ||
| 152 | |||
| 153 | hpdp->pd = 0; | ||
| 154 | tlb->need_flush = 1; | ||
| 155 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, | ||
| 156 | HUGEPTE_TABLE_SIZE-1)); | ||
| 157 | } | ||
| 158 | |||
| 92 | #ifdef CONFIG_PPC_64K_PAGES | 159 | #ifdef CONFIG_PPC_64K_PAGES |
| 93 | /* See comment in huge_pte_offset. Note that if we ever | 160 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
| 94 | * want to put the page size in the PMD, we would have | 161 | unsigned long addr, unsigned long end, |
| 95 | * to open code our own pte_alloc* function in order | 162 | unsigned long floor, unsigned long ceiling) |
| 96 | * to populate and set the size atomically | 163 | { |
| 97 | */ | 164 | pmd_t *pmd; |
| 98 | pt = pte_alloc_map(mm, pm, addr); | 165 | unsigned long next; |
| 99 | #else /* CONFIG_PPC_64K_PAGES */ | 166 | unsigned long start; |
| 100 | pt = (pte_t *)pm; | 167 | |
| 101 | #endif /* CONFIG_PPC_64K_PAGES */ | 168 | start = addr; |
| 102 | return pt; | 169 | pmd = pmd_offset(pud, addr); |
| 103 | } | 170 | do { |
| 171 | next = pmd_addr_end(addr, end); | ||
| 172 | if (pmd_none(*pmd)) | ||
| 173 | continue; | ||
| 174 | free_hugepte_range(tlb, (hugepd_t *)pmd); | ||
| 175 | } while (pmd++, addr = next, addr != end); | ||
| 176 | |||
| 177 | start &= PUD_MASK; | ||
| 178 | if (start < floor) | ||
| 179 | return; | ||
| 180 | if (ceiling) { | ||
| 181 | ceiling &= PUD_MASK; | ||
| 182 | if (!ceiling) | ||
| 183 | return; | ||
| 104 | } | 184 | } |
| 185 | if (end - 1 > ceiling - 1) | ||
| 186 | return; | ||
| 105 | 187 | ||
| 106 | return NULL; | 188 | pmd = pmd_offset(pud, start); |
| 189 | pud_clear(pud); | ||
| 190 | pmd_free_tlb(tlb, pmd); | ||
| 191 | } | ||
| 192 | #endif | ||
| 193 | |||
| 194 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | ||
| 195 | unsigned long addr, unsigned long end, | ||
| 196 | unsigned long floor, unsigned long ceiling) | ||
| 197 | { | ||
| 198 | pud_t *pud; | ||
| 199 | unsigned long next; | ||
| 200 | unsigned long start; | ||
| 201 | |||
| 202 | start = addr; | ||
| 203 | pud = pud_offset(pgd, addr); | ||
| 204 | do { | ||
| 205 | next = pud_addr_end(addr, end); | ||
| 206 | #ifdef CONFIG_PPC_64K_PAGES | ||
| 207 | if (pud_none_or_clear_bad(pud)) | ||
| 208 | continue; | ||
| 209 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | ||
| 210 | #else | ||
| 211 | if (pud_none(*pud)) | ||
| 212 | continue; | ||
| 213 | free_hugepte_range(tlb, (hugepd_t *)pud); | ||
| 214 | #endif | ||
| 215 | } while (pud++, addr = next, addr != end); | ||
| 216 | |||
| 217 | start &= PGDIR_MASK; | ||
| 218 | if (start < floor) | ||
| 219 | return; | ||
| 220 | if (ceiling) { | ||
| 221 | ceiling &= PGDIR_MASK; | ||
| 222 | if (!ceiling) | ||
| 223 | return; | ||
| 224 | } | ||
| 225 | if (end - 1 > ceiling - 1) | ||
| 226 | return; | ||
| 227 | |||
| 228 | pud = pud_offset(pgd, start); | ||
| 229 | pgd_clear(pgd); | ||
| 230 | pud_free_tlb(tlb, pud); | ||
| 231 | } | ||
| 232 | |||
| 233 | /* | ||
| 234 | * This function frees user-level page tables of a process. | ||
| 235 | * | ||
| 236 | * Must be called with pagetable lock held. | ||
| 237 | */ | ||
| 238 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, | ||
| 239 | unsigned long addr, unsigned long end, | ||
| 240 | unsigned long floor, unsigned long ceiling) | ||
| 241 | { | ||
| 242 | pgd_t *pgd; | ||
| 243 | unsigned long next; | ||
| 244 | unsigned long start; | ||
| 245 | |||
| 246 | /* | ||
| 247 | * Comments below take from the normal free_pgd_range(). They | ||
| 248 | * apply here too. The tests against HUGEPD_MASK below are | ||
| 249 | * essential, because we *don't* test for this at the bottom | ||
| 250 | * level. Without them we'll attempt to free a hugepte table | ||
| 251 | * when we unmap just part of it, even if there are other | ||
| 252 | * active mappings using it. | ||
| 253 | * | ||
| 254 | * The next few lines have given us lots of grief... | ||
| 255 | * | ||
| 256 | * Why are we testing HUGEPD* at this top level? Because | ||
| 257 | * often there will be no work to do at all, and we'd prefer | ||
| 258 | * not to go all the way down to the bottom just to discover | ||
| 259 | * that. | ||
| 260 | * | ||
| 261 | * Why all these "- 1"s? Because 0 represents both the bottom | ||
| 262 | * of the address space and the top of it (using -1 for the | ||
| 263 | * top wouldn't help much: the masks would do the wrong thing). | ||
| 264 | * The rule is that addr 0 and floor 0 refer to the bottom of | ||
| 265 | * the address space, but end 0 and ceiling 0 refer to the top | ||
| 266 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | ||
| 267 | * that end 0 case should be mythical). | ||
| 268 | * | ||
| 269 | * Wherever addr is brought up or ceiling brought down, we | ||
| 270 | * must be careful to reject "the opposite 0" before it | ||
| 271 | * confuses the subsequent tests. But what about where end is | ||
| 272 | * brought down by HUGEPD_SIZE below? no, end can't go down to | ||
| 273 | * 0 there. | ||
| 274 | * | ||
| 275 | * Whereas we round start (addr) and ceiling down, by different | ||
| 276 | * masks at different levels, in order to test whether a table | ||
| 277 | * now has no other vmas using it, so can be freed, we don't | ||
| 278 | * bother to round floor or end up - the tests don't need that. | ||
| 279 | */ | ||
| 280 | |||
| 281 | addr &= HUGEPD_MASK; | ||
| 282 | if (addr < floor) { | ||
| 283 | addr += HUGEPD_SIZE; | ||
| 284 | if (!addr) | ||
| 285 | return; | ||
| 286 | } | ||
| 287 | if (ceiling) { | ||
| 288 | ceiling &= HUGEPD_MASK; | ||
| 289 | if (!ceiling) | ||
| 290 | return; | ||
| 291 | } | ||
| 292 | if (end - 1 > ceiling - 1) | ||
| 293 | end -= HUGEPD_SIZE; | ||
| 294 | if (addr > end - 1) | ||
| 295 | return; | ||
| 296 | |||
| 297 | start = addr; | ||
| 298 | pgd = pgd_offset((*tlb)->mm, addr); | ||
| 299 | do { | ||
| 300 | BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); | ||
| 301 | next = pgd_addr_end(addr, end); | ||
| 302 | if (pgd_none_or_clear_bad(pgd)) | ||
| 303 | continue; | ||
| 304 | hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | ||
| 305 | } while (pgd++, addr = next, addr != end); | ||
| 107 | } | 306 | } |
| 108 | 307 | ||
| 109 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 308 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
| @@ -841,3 +1040,27 @@ repeat: | |||
| 841 | out: | 1040 | out: |
| 842 | return err; | 1041 | return err; |
| 843 | } | 1042 | } |
| 1043 | |||
| 1044 | static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) | ||
| 1045 | { | ||
| 1046 | memset(addr, 0, kmem_cache_size(cache)); | ||
| 1047 | } | ||
| 1048 | |||
| 1049 | static int __init hugetlbpage_init(void) | ||
| 1050 | { | ||
| 1051 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | ||
| 1052 | return -ENODEV; | ||
| 1053 | |||
| 1054 | huge_pgtable_cache = kmem_cache_create("hugepte_cache", | ||
| 1055 | HUGEPTE_TABLE_SIZE, | ||
| 1056 | HUGEPTE_TABLE_SIZE, | ||
| 1057 | SLAB_HWCACHE_ALIGN | | ||
| 1058 | SLAB_MUST_HWCACHE_ALIGN, | ||
| 1059 | zero_ctor, NULL); | ||
| 1060 | if (! huge_pgtable_cache) | ||
| 1061 | panic("hugetlbpage_init(): could not create hugepte cache\n"); | ||
| 1062 | |||
| 1063 | return 0; | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | module_init(hugetlbpage_init); | ||
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index babebd15bdc4..9e30f968c184 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
| @@ -162,7 +162,14 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | |||
| 162 | }; | 162 | }; |
| 163 | #endif /* CONFIG_PPC_64K_PAGES */ | 163 | #endif /* CONFIG_PPC_64K_PAGES */ |
| 164 | 164 | ||
| 165 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 166 | /* Hugepages need one extra cache, initialized in hugetlbpage.c. We | ||
| 167 | * can't put into the tables above, because HPAGE_SHIFT is not compile | ||
| 168 | * time constant. */ | ||
| 169 | kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; | ||
| 170 | #else | ||
| 165 | kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | 171 | kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; |
| 172 | #endif | ||
| 166 | 173 | ||
| 167 | void pgtable_cache_init(void) | 174 | void pgtable_cache_init(void) |
| 168 | { | 175 | { |
diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h index 3fb061bab9ec..eab779c21995 100644 --- a/include/asm-powerpc/page_64.h +++ b/include/asm-powerpc/page_64.h | |||
| @@ -101,6 +101,7 @@ extern unsigned int HPAGE_SHIFT; | |||
| 101 | - (1U << GET_HTLB_AREA(addr))) & 0xffff) | 101 | - (1U << GET_HTLB_AREA(addr))) & 0xffff) |
| 102 | 102 | ||
| 103 | #define ARCH_HAS_HUGEPAGE_ONLY_RANGE | 103 | #define ARCH_HAS_HUGEPAGE_ONLY_RANGE |
| 104 | #define ARCH_HAS_HUGETLB_FREE_PGD_RANGE | ||
| 104 | #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE | 105 | #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE |
| 105 | #define ARCH_HAS_SETCLEAR_HUGE_PTE | 106 | #define ARCH_HAS_SETCLEAR_HUGE_PTE |
| 106 | 107 | ||
diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h index a00ee002cd11..9f0917c68659 100644 --- a/include/asm-powerpc/pgalloc.h +++ b/include/asm-powerpc/pgalloc.h | |||
| @@ -17,11 +17,13 @@ extern kmem_cache_t *pgtable_cache[]; | |||
| 17 | #define PTE_CACHE_NUM 0 | 17 | #define PTE_CACHE_NUM 0 |
| 18 | #define PMD_CACHE_NUM 1 | 18 | #define PMD_CACHE_NUM 1 |
| 19 | #define PGD_CACHE_NUM 2 | 19 | #define PGD_CACHE_NUM 2 |
| 20 | #define HUGEPTE_CACHE_NUM 3 | ||
| 20 | #else | 21 | #else |
| 21 | #define PTE_CACHE_NUM 0 | 22 | #define PTE_CACHE_NUM 0 |
| 22 | #define PMD_CACHE_NUM 1 | 23 | #define PMD_CACHE_NUM 1 |
| 23 | #define PUD_CACHE_NUM 1 | 24 | #define PUD_CACHE_NUM 1 |
| 24 | #define PGD_CACHE_NUM 0 | 25 | #define PGD_CACHE_NUM 0 |
| 26 | #define HUGEPTE_CACHE_NUM 2 | ||
| 25 | #endif | 27 | #endif |
| 26 | 28 | ||
| 27 | /* | 29 | /* |
