diff options
Diffstat (limited to 'mm/hugetlb.c')
| -rw-r--r-- | mm/hugetlb.c | 286 |
1 files changed, 233 insertions, 53 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 508707704d2c..ebad6bbb3501 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -13,24 +13,48 @@ | |||
| 13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
| 14 | #include <linux/mempolicy.h> | 14 | #include <linux/mempolicy.h> |
| 15 | #include <linux/cpuset.h> | 15 | #include <linux/cpuset.h> |
| 16 | #include <linux/mutex.h> | ||
| 16 | 17 | ||
| 17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
| 18 | #include <asm/pgtable.h> | 19 | #include <asm/pgtable.h> |
| 19 | 20 | ||
| 20 | #include <linux/hugetlb.h> | 21 | #include <linux/hugetlb.h> |
| 22 | #include "internal.h" | ||
| 21 | 23 | ||
| 22 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 24 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
| 23 | static unsigned long nr_huge_pages, free_huge_pages; | 25 | static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages; |
| 24 | unsigned long max_huge_pages; | 26 | unsigned long max_huge_pages; |
| 25 | static struct list_head hugepage_freelists[MAX_NUMNODES]; | 27 | static struct list_head hugepage_freelists[MAX_NUMNODES]; |
| 26 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; | 28 | static unsigned int nr_huge_pages_node[MAX_NUMNODES]; |
| 27 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; | 29 | static unsigned int free_huge_pages_node[MAX_NUMNODES]; |
| 28 | |||
| 29 | /* | 30 | /* |
| 30 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages | 31 | * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages |
| 31 | */ | 32 | */ |
| 32 | static DEFINE_SPINLOCK(hugetlb_lock); | 33 | static DEFINE_SPINLOCK(hugetlb_lock); |
| 33 | 34 | ||
| 35 | static void clear_huge_page(struct page *page, unsigned long addr) | ||
| 36 | { | ||
| 37 | int i; | ||
| 38 | |||
| 39 | might_sleep(); | ||
| 40 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { | ||
| 41 | cond_resched(); | ||
| 42 | clear_user_highpage(page + i, addr); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | static void copy_huge_page(struct page *dst, struct page *src, | ||
| 47 | unsigned long addr) | ||
| 48 | { | ||
| 49 | int i; | ||
| 50 | |||
| 51 | might_sleep(); | ||
| 52 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { | ||
| 53 | cond_resched(); | ||
| 54 | copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 34 | static void enqueue_huge_page(struct page *page) | 58 | static void enqueue_huge_page(struct page *page) |
| 35 | { | 59 | { |
| 36 | int nid = page_to_nid(page); | 60 | int nid = page_to_nid(page); |
| @@ -64,57 +88,176 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
| 64 | return page; | 88 | return page; |
| 65 | } | 89 | } |
| 66 | 90 | ||
| 67 | static struct page *alloc_fresh_huge_page(void) | 91 | static void free_huge_page(struct page *page) |
| 92 | { | ||
| 93 | BUG_ON(page_count(page)); | ||
| 94 | |||
| 95 | INIT_LIST_HEAD(&page->lru); | ||
| 96 | |||
| 97 | spin_lock(&hugetlb_lock); | ||
| 98 | enqueue_huge_page(page); | ||
| 99 | spin_unlock(&hugetlb_lock); | ||
| 100 | } | ||
| 101 | |||
| 102 | static int alloc_fresh_huge_page(void) | ||
| 68 | { | 103 | { |
| 69 | static int nid = 0; | 104 | static int nid = 0; |
| 70 | struct page *page; | 105 | struct page *page; |
| 71 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, | 106 | page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, |
| 72 | HUGETLB_PAGE_ORDER); | 107 | HUGETLB_PAGE_ORDER); |
| 73 | nid = (nid + 1) % num_online_nodes(); | 108 | nid = next_node(nid, node_online_map); |
| 109 | if (nid == MAX_NUMNODES) | ||
| 110 | nid = first_node(node_online_map); | ||
| 74 | if (page) { | 111 | if (page) { |
| 112 | page[1].lru.next = (void *)free_huge_page; /* dtor */ | ||
| 75 | spin_lock(&hugetlb_lock); | 113 | spin_lock(&hugetlb_lock); |
| 76 | nr_huge_pages++; | 114 | nr_huge_pages++; |
| 77 | nr_huge_pages_node[page_to_nid(page)]++; | 115 | nr_huge_pages_node[page_to_nid(page)]++; |
| 78 | spin_unlock(&hugetlb_lock); | 116 | spin_unlock(&hugetlb_lock); |
| 117 | put_page(page); /* free it into the hugepage allocator */ | ||
| 118 | return 1; | ||
| 79 | } | 119 | } |
| 80 | return page; | 120 | return 0; |
| 81 | } | 121 | } |
| 82 | 122 | ||
| 83 | void free_huge_page(struct page *page) | 123 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
| 124 | unsigned long addr) | ||
| 84 | { | 125 | { |
| 85 | BUG_ON(page_count(page)); | 126 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
| 127 | struct page *page; | ||
| 128 | int use_reserve = 0; | ||
| 129 | unsigned long idx; | ||
| 86 | 130 | ||
| 87 | INIT_LIST_HEAD(&page->lru); | 131 | spin_lock(&hugetlb_lock); |
| 88 | page[1].lru.next = NULL; /* reset dtor */ | 132 | |
| 133 | if (vma->vm_flags & VM_MAYSHARE) { | ||
| 134 | |||
| 135 | /* idx = radix tree index, i.e. offset into file in | ||
| 136 | * HPAGE_SIZE units */ | ||
| 137 | idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) | ||
| 138 | + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); | ||
| 139 | |||
| 140 | /* The hugetlbfs specific inode info stores the number | ||
| 141 | * of "guaranteed available" (huge) pages. That is, | ||
| 142 | * the first 'prereserved_hpages' pages of the inode | ||
| 143 | * are either already instantiated, or have been | ||
| 144 | * pre-reserved (by hugetlb_reserve_for_inode()). Here | ||
| 145 | * we're in the process of instantiating the page, so | ||
| 146 | * we use this to determine whether to draw from the | ||
| 147 | * pre-reserved pool or the truly free pool. */ | ||
| 148 | if (idx < HUGETLBFS_I(inode)->prereserved_hpages) | ||
| 149 | use_reserve = 1; | ||
| 150 | } | ||
| 151 | |||
| 152 | if (!use_reserve) { | ||
| 153 | if (free_huge_pages <= reserved_huge_pages) | ||
| 154 | goto fail; | ||
| 155 | } else { | ||
| 156 | BUG_ON(reserved_huge_pages == 0); | ||
| 157 | reserved_huge_pages--; | ||
| 158 | } | ||
| 159 | |||
| 160 | page = dequeue_huge_page(vma, addr); | ||
| 161 | if (!page) | ||
| 162 | goto fail; | ||
| 163 | |||
| 164 | spin_unlock(&hugetlb_lock); | ||
| 165 | set_page_refcounted(page); | ||
| 166 | return page; | ||
| 167 | |||
| 168 | fail: | ||
| 169 | WARN_ON(use_reserve); /* reserved allocations shouldn't fail */ | ||
| 170 | spin_unlock(&hugetlb_lock); | ||
| 171 | return NULL; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* hugetlb_extend_reservation() | ||
| 175 | * | ||
| 176 | * Ensure that at least 'atleast' hugepages are, and will remain, | ||
| 177 | * available to instantiate the first 'atleast' pages of the given | ||
| 178 | * inode. If the inode doesn't already have this many pages reserved | ||
| 179 | * or instantiated, set aside some hugepages in the reserved pool to | ||
| 180 | * satisfy later faults (or fail now if there aren't enough, rather | ||
| 181 | * than getting the SIGBUS later). | ||
| 182 | */ | ||
| 183 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
| 184 | unsigned long atleast) | ||
| 185 | { | ||
| 186 | struct inode *inode = &info->vfs_inode; | ||
| 187 | unsigned long change_in_reserve = 0; | ||
| 188 | int ret = 0; | ||
| 89 | 189 | ||
| 90 | spin_lock(&hugetlb_lock); | 190 | spin_lock(&hugetlb_lock); |
| 91 | enqueue_huge_page(page); | 191 | read_lock_irq(&inode->i_mapping->tree_lock); |
| 192 | |||
| 193 | if (info->prereserved_hpages >= atleast) | ||
| 194 | goto out; | ||
| 195 | |||
| 196 | /* Because we always call this on shared mappings, none of the | ||
| 197 | * pages beyond info->prereserved_hpages can have been | ||
| 198 | * instantiated, so we need to reserve all of them now. */ | ||
| 199 | change_in_reserve = atleast - info->prereserved_hpages; | ||
| 200 | |||
| 201 | if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) { | ||
| 202 | ret = -ENOMEM; | ||
| 203 | goto out; | ||
| 204 | } | ||
| 205 | |||
| 206 | reserved_huge_pages += change_in_reserve; | ||
| 207 | info->prereserved_hpages = atleast; | ||
| 208 | |||
| 209 | out: | ||
| 210 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
| 92 | spin_unlock(&hugetlb_lock); | 211 | spin_unlock(&hugetlb_lock); |
| 212 | |||
| 213 | return ret; | ||
| 93 | } | 214 | } |
| 94 | 215 | ||
| 95 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) | 216 | /* hugetlb_truncate_reservation() |
| 217 | * | ||
| 218 | * This returns pages reserved for the given inode to the general free | ||
| 219 | * hugepage pool. If the inode has any pages prereserved, but not | ||
| 220 | * instantiated, beyond offset (atmost << HPAGE_SIZE), then release | ||
| 221 | * them. | ||
| 222 | */ | ||
| 223 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
| 224 | unsigned long atmost) | ||
| 96 | { | 225 | { |
| 226 | struct inode *inode = &info->vfs_inode; | ||
| 227 | struct address_space *mapping = inode->i_mapping; | ||
| 228 | unsigned long idx; | ||
| 229 | unsigned long change_in_reserve = 0; | ||
| 97 | struct page *page; | 230 | struct page *page; |
| 98 | int i; | ||
| 99 | 231 | ||
| 100 | spin_lock(&hugetlb_lock); | 232 | spin_lock(&hugetlb_lock); |
| 101 | page = dequeue_huge_page(vma, addr); | 233 | read_lock_irq(&inode->i_mapping->tree_lock); |
| 102 | if (!page) { | 234 | |
| 103 | spin_unlock(&hugetlb_lock); | 235 | if (info->prereserved_hpages <= atmost) |
| 104 | return NULL; | 236 | goto out; |
| 237 | |||
| 238 | /* Count pages which were reserved, but not instantiated, and | ||
| 239 | * which we can now release. */ | ||
| 240 | for (idx = atmost; idx < info->prereserved_hpages; idx++) { | ||
| 241 | page = radix_tree_lookup(&mapping->page_tree, idx); | ||
| 242 | if (!page) | ||
| 243 | /* Pages which are already instantiated can't | ||
| 244 | * be unreserved (and in fact have already | ||
| 245 | * been removed from the reserved pool) */ | ||
| 246 | change_in_reserve++; | ||
| 105 | } | 247 | } |
| 248 | |||
| 249 | BUG_ON(reserved_huge_pages < change_in_reserve); | ||
| 250 | reserved_huge_pages -= change_in_reserve; | ||
| 251 | info->prereserved_hpages = atmost; | ||
| 252 | |||
| 253 | out: | ||
| 254 | read_unlock_irq(&inode->i_mapping->tree_lock); | ||
| 106 | spin_unlock(&hugetlb_lock); | 255 | spin_unlock(&hugetlb_lock); |
| 107 | set_page_count(page, 1); | ||
| 108 | page[1].lru.next = (void *)free_huge_page; /* set dtor */ | ||
| 109 | for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i) | ||
| 110 | clear_user_highpage(&page[i], addr); | ||
| 111 | return page; | ||
| 112 | } | 256 | } |
| 113 | 257 | ||
| 114 | static int __init hugetlb_init(void) | 258 | static int __init hugetlb_init(void) |
| 115 | { | 259 | { |
| 116 | unsigned long i; | 260 | unsigned long i; |
| 117 | struct page *page; | ||
| 118 | 261 | ||
| 119 | if (HPAGE_SHIFT == 0) | 262 | if (HPAGE_SHIFT == 0) |
| 120 | return 0; | 263 | return 0; |
| @@ -123,12 +266,8 @@ static int __init hugetlb_init(void) | |||
| 123 | INIT_LIST_HEAD(&hugepage_freelists[i]); | 266 | INIT_LIST_HEAD(&hugepage_freelists[i]); |
| 124 | 267 | ||
| 125 | for (i = 0; i < max_huge_pages; ++i) { | 268 | for (i = 0; i < max_huge_pages; ++i) { |
| 126 | page = alloc_fresh_huge_page(); | 269 | if (!alloc_fresh_huge_page()) |
| 127 | if (!page) | ||
| 128 | break; | 270 | break; |
| 129 | spin_lock(&hugetlb_lock); | ||
| 130 | enqueue_huge_page(page); | ||
| 131 | spin_unlock(&hugetlb_lock); | ||
| 132 | } | 271 | } |
| 133 | max_huge_pages = free_huge_pages = nr_huge_pages = i; | 272 | max_huge_pages = free_huge_pages = nr_huge_pages = i; |
| 134 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); | 273 | printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); |
| @@ -154,9 +293,9 @@ static void update_and_free_page(struct page *page) | |||
| 154 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | | 293 | page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | |
| 155 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | | 294 | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | |
| 156 | 1 << PG_private | 1<< PG_writeback); | 295 | 1 << PG_private | 1<< PG_writeback); |
| 157 | set_page_count(&page[i], 0); | ||
| 158 | } | 296 | } |
| 159 | set_page_count(page, 1); | 297 | page[1].lru.next = NULL; |
| 298 | set_page_refcounted(page); | ||
| 160 | __free_pages(page, HUGETLB_PAGE_ORDER); | 299 | __free_pages(page, HUGETLB_PAGE_ORDER); |
| 161 | } | 300 | } |
| 162 | 301 | ||
| @@ -188,12 +327,8 @@ static inline void try_to_free_low(unsigned long count) | |||
| 188 | static unsigned long set_max_huge_pages(unsigned long count) | 327 | static unsigned long set_max_huge_pages(unsigned long count) |
| 189 | { | 328 | { |
| 190 | while (count > nr_huge_pages) { | 329 | while (count > nr_huge_pages) { |
| 191 | struct page *page = alloc_fresh_huge_page(); | 330 | if (!alloc_fresh_huge_page()) |
| 192 | if (!page) | ||
| 193 | return nr_huge_pages; | 331 | return nr_huge_pages; |
| 194 | spin_lock(&hugetlb_lock); | ||
| 195 | enqueue_huge_page(page); | ||
| 196 | spin_unlock(&hugetlb_lock); | ||
| 197 | } | 332 | } |
| 198 | if (count >= nr_huge_pages) | 333 | if (count >= nr_huge_pages) |
| 199 | return nr_huge_pages; | 334 | return nr_huge_pages; |
| @@ -225,9 +360,11 @@ int hugetlb_report_meminfo(char *buf) | |||
| 225 | return sprintf(buf, | 360 | return sprintf(buf, |
| 226 | "HugePages_Total: %5lu\n" | 361 | "HugePages_Total: %5lu\n" |
| 227 | "HugePages_Free: %5lu\n" | 362 | "HugePages_Free: %5lu\n" |
| 363 | "HugePages_Rsvd: %5lu\n" | ||
| 228 | "Hugepagesize: %5lu kB\n", | 364 | "Hugepagesize: %5lu kB\n", |
| 229 | nr_huge_pages, | 365 | nr_huge_pages, |
| 230 | free_huge_pages, | 366 | free_huge_pages, |
| 367 | reserved_huge_pages, | ||
| 231 | HPAGE_SIZE/1024); | 368 | HPAGE_SIZE/1024); |
| 232 | } | 369 | } |
| 233 | 370 | ||
| @@ -240,11 +377,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf) | |||
| 240 | nid, free_huge_pages_node[nid]); | 377 | nid, free_huge_pages_node[nid]); |
| 241 | } | 378 | } |
| 242 | 379 | ||
| 243 | int is_hugepage_mem_enough(size_t size) | ||
| 244 | { | ||
| 245 | return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ | 380 | /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ |
| 249 | unsigned long hugetlb_total_pages(void) | 381 | unsigned long hugetlb_total_pages(void) |
| 250 | { | 382 | { |
| @@ -374,7 +506,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 374 | unsigned long address, pte_t *ptep, pte_t pte) | 506 | unsigned long address, pte_t *ptep, pte_t pte) |
| 375 | { | 507 | { |
| 376 | struct page *old_page, *new_page; | 508 | struct page *old_page, *new_page; |
| 377 | int i, avoidcopy; | 509 | int avoidcopy; |
| 378 | 510 | ||
| 379 | old_page = pte_page(pte); | 511 | old_page = pte_page(pte); |
| 380 | 512 | ||
| @@ -395,9 +527,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 395 | } | 527 | } |
| 396 | 528 | ||
| 397 | spin_unlock(&mm->page_table_lock); | 529 | spin_unlock(&mm->page_table_lock); |
| 398 | for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) | 530 | copy_huge_page(new_page, old_page, address); |
| 399 | copy_user_highpage(new_page + i, old_page + i, | ||
| 400 | address + i*PAGE_SIZE); | ||
| 401 | spin_lock(&mm->page_table_lock); | 531 | spin_lock(&mm->page_table_lock); |
| 402 | 532 | ||
| 403 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); | 533 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); |
| @@ -442,6 +572,7 @@ retry: | |||
| 442 | ret = VM_FAULT_OOM; | 572 | ret = VM_FAULT_OOM; |
| 443 | goto out; | 573 | goto out; |
| 444 | } | 574 | } |
| 575 | clear_huge_page(page, address); | ||
| 445 | 576 | ||
| 446 | if (vma->vm_flags & VM_SHARED) { | 577 | if (vma->vm_flags & VM_SHARED) { |
| 447 | int err; | 578 | int err; |
| @@ -496,14 +627,24 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 496 | pte_t *ptep; | 627 | pte_t *ptep; |
| 497 | pte_t entry; | 628 | pte_t entry; |
| 498 | int ret; | 629 | int ret; |
| 630 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | ||
| 499 | 631 | ||
| 500 | ptep = huge_pte_alloc(mm, address); | 632 | ptep = huge_pte_alloc(mm, address); |
| 501 | if (!ptep) | 633 | if (!ptep) |
| 502 | return VM_FAULT_OOM; | 634 | return VM_FAULT_OOM; |
| 503 | 635 | ||
| 636 | /* | ||
| 637 | * Serialize hugepage allocation and instantiation, so that we don't | ||
| 638 | * get spurious allocation failures if two CPUs race to instantiate | ||
| 639 | * the same page in the page cache. | ||
| 640 | */ | ||
| 641 | mutex_lock(&hugetlb_instantiation_mutex); | ||
| 504 | entry = *ptep; | 642 | entry = *ptep; |
| 505 | if (pte_none(entry)) | 643 | if (pte_none(entry)) { |
| 506 | return hugetlb_no_page(mm, vma, address, ptep, write_access); | 644 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); |
| 645 | mutex_unlock(&hugetlb_instantiation_mutex); | ||
| 646 | return ret; | ||
| 647 | } | ||
| 507 | 648 | ||
| 508 | ret = VM_FAULT_MINOR; | 649 | ret = VM_FAULT_MINOR; |
| 509 | 650 | ||
| @@ -513,6 +654,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 513 | if (write_access && !pte_write(entry)) | 654 | if (write_access && !pte_write(entry)) |
| 514 | ret = hugetlb_cow(mm, vma, address, ptep, entry); | 655 | ret = hugetlb_cow(mm, vma, address, ptep, entry); |
| 515 | spin_unlock(&mm->page_table_lock); | 656 | spin_unlock(&mm->page_table_lock); |
| 657 | mutex_unlock(&hugetlb_instantiation_mutex); | ||
| 516 | 658 | ||
| 517 | return ret; | 659 | return ret; |
| 518 | } | 660 | } |
| @@ -521,10 +663,10 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 521 | struct page **pages, struct vm_area_struct **vmas, | 663 | struct page **pages, struct vm_area_struct **vmas, |
| 522 | unsigned long *position, int *length, int i) | 664 | unsigned long *position, int *length, int i) |
| 523 | { | 665 | { |
| 524 | unsigned long vpfn, vaddr = *position; | 666 | unsigned long pfn_offset; |
| 667 | unsigned long vaddr = *position; | ||
| 525 | int remainder = *length; | 668 | int remainder = *length; |
| 526 | 669 | ||
| 527 | vpfn = vaddr/PAGE_SIZE; | ||
| 528 | spin_lock(&mm->page_table_lock); | 670 | spin_lock(&mm->page_table_lock); |
| 529 | while (vaddr < vma->vm_end && remainder) { | 671 | while (vaddr < vma->vm_end && remainder) { |
| 530 | pte_t *pte; | 672 | pte_t *pte; |
| @@ -552,19 +694,28 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 552 | break; | 694 | break; |
| 553 | } | 695 | } |
| 554 | 696 | ||
| 555 | if (pages) { | 697 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; |
| 556 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 698 | page = pte_page(*pte); |
| 557 | get_page(page); | 699 | same_page: |
| 558 | pages[i] = page; | 700 | get_page(page); |
| 559 | } | 701 | if (pages) |
| 702 | pages[i] = page + pfn_offset; | ||
| 560 | 703 | ||
| 561 | if (vmas) | 704 | if (vmas) |
| 562 | vmas[i] = vma; | 705 | vmas[i] = vma; |
| 563 | 706 | ||
| 564 | vaddr += PAGE_SIZE; | 707 | vaddr += PAGE_SIZE; |
| 565 | ++vpfn; | 708 | ++pfn_offset; |
| 566 | --remainder; | 709 | --remainder; |
| 567 | ++i; | 710 | ++i; |
| 711 | if (vaddr < vma->vm_end && remainder && | ||
| 712 | pfn_offset < HPAGE_SIZE/PAGE_SIZE) { | ||
| 713 | /* | ||
| 714 | * We use pfn_offset to avoid touching the pageframes | ||
| 715 | * of this compound page. | ||
| 716 | */ | ||
| 717 | goto same_page; | ||
| 718 | } | ||
| 568 | } | 719 | } |
| 569 | spin_unlock(&mm->page_table_lock); | 720 | spin_unlock(&mm->page_table_lock); |
| 570 | *length = remainder; | 721 | *length = remainder; |
| @@ -572,3 +723,32 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 572 | 723 | ||
| 573 | return i; | 724 | return i; |
| 574 | } | 725 | } |
| 726 | |||
| 727 | void hugetlb_change_protection(struct vm_area_struct *vma, | ||
| 728 | unsigned long address, unsigned long end, pgprot_t newprot) | ||
| 729 | { | ||
| 730 | struct mm_struct *mm = vma->vm_mm; | ||
| 731 | unsigned long start = address; | ||
| 732 | pte_t *ptep; | ||
| 733 | pte_t pte; | ||
| 734 | |||
| 735 | BUG_ON(address >= end); | ||
| 736 | flush_cache_range(vma, address, end); | ||
| 737 | |||
| 738 | spin_lock(&mm->page_table_lock); | ||
| 739 | for (; address < end; address += HPAGE_SIZE) { | ||
| 740 | ptep = huge_pte_offset(mm, address); | ||
| 741 | if (!ptep) | ||
| 742 | continue; | ||
| 743 | if (!pte_none(*ptep)) { | ||
| 744 | pte = huge_ptep_get_and_clear(mm, address, ptep); | ||
| 745 | pte = pte_mkhuge(pte_modify(pte, newprot)); | ||
| 746 | set_huge_pte_at(mm, address, ptep, pte); | ||
| 747 | lazy_mmu_prot_update(pte); | ||
| 748 | } | ||
| 749 | } | ||
| 750 | spin_unlock(&mm->page_table_lock); | ||
| 751 | |||
| 752 | flush_tlb_range(vma, start, end); | ||
| 753 | } | ||
| 754 | |||
