diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
| -rw-r--r-- | fs/hugetlbfs/inode.c | 206 |
1 files changed, 120 insertions, 86 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3a9b6d179cbd..e026c807e6b3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -45,10 +45,58 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = { | |||
| 45 | 45 | ||
| 46 | int sysctl_hugetlb_shm_group; | 46 | int sysctl_hugetlb_shm_group; |
| 47 | 47 | ||
| 48 | static void huge_pagevec_release(struct pagevec *pvec) | ||
| 49 | { | ||
| 50 | int i; | ||
| 51 | |||
| 52 | for (i = 0; i < pagevec_count(pvec); ++i) | ||
| 53 | put_page(pvec->pages[i]); | ||
| 54 | |||
| 55 | pagevec_reinit(pvec); | ||
| 56 | } | ||
| 57 | |||
| 58 | /* | ||
| 59 | * huge_pages_needed tries to determine the number of new huge pages that | ||
| 60 | * will be required to fully populate this VMA. This will be equal to | ||
| 61 | * the size of the VMA in huge pages minus the number of huge pages | ||
| 62 | * (covered by this VMA) that are found in the page cache. | ||
| 63 | * | ||
| 64 | * Result is in bytes to be compatible with is_hugepage_mem_enough() | ||
| 65 | */ | ||
| 66 | unsigned long | ||
| 67 | huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) | ||
| 68 | { | ||
| 69 | int i; | ||
| 70 | struct pagevec pvec; | ||
| 71 | unsigned long start = vma->vm_start; | ||
| 72 | unsigned long end = vma->vm_end; | ||
| 73 | unsigned long hugepages = (end - start) >> HPAGE_SHIFT; | ||
| 74 | pgoff_t next = vma->vm_pgoff; | ||
| 75 | pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT); | ||
| 76 | |||
| 77 | pagevec_init(&pvec, 0); | ||
| 78 | while (next < endpg) { | ||
| 79 | if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) | ||
| 80 | break; | ||
| 81 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
| 82 | struct page *page = pvec.pages[i]; | ||
| 83 | if (page->index > next) | ||
| 84 | next = page->index; | ||
| 85 | if (page->index >= endpg) | ||
| 86 | break; | ||
| 87 | next++; | ||
| 88 | hugepages--; | ||
| 89 | } | ||
| 90 | huge_pagevec_release(&pvec); | ||
| 91 | } | ||
| 92 | return hugepages << HPAGE_SHIFT; | ||
| 93 | } | ||
| 94 | |||
| 48 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 95 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
| 49 | { | 96 | { |
| 50 | struct inode *inode = file->f_dentry->d_inode; | 97 | struct inode *inode = file->f_dentry->d_inode; |
| 51 | struct address_space *mapping = inode->i_mapping; | 98 | struct address_space *mapping = inode->i_mapping; |
| 99 | unsigned long bytes; | ||
| 52 | loff_t len, vma_len; | 100 | loff_t len, vma_len; |
| 53 | int ret; | 101 | int ret; |
| 54 | 102 | ||
| @@ -67,6 +115,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 67 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) | 115 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) |
| 68 | return -EINVAL; | 116 | return -EINVAL; |
| 69 | 117 | ||
| 118 | bytes = huge_pages_needed(mapping, vma); | ||
| 119 | if (!is_hugepage_mem_enough(bytes)) | ||
| 120 | return -ENOMEM; | ||
| 121 | |||
| 70 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); | 122 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); |
| 71 | 123 | ||
| 72 | down(&inode->i_sem); | 124 | down(&inode->i_sem); |
| @@ -79,10 +131,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 79 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) | 131 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) |
| 80 | goto out; | 132 | goto out; |
| 81 | 133 | ||
| 82 | ret = hugetlb_prefault(mapping, vma); | 134 | ret = 0; |
| 83 | if (ret) | 135 | hugetlb_prefault_arch_hook(vma->vm_mm); |
| 84 | goto out; | ||
| 85 | |||
| 86 | if (inode->i_size < len) | 136 | if (inode->i_size < len) |
| 87 | inode->i_size = len; | 137 | inode->i_size = len; |
| 88 | out: | 138 | out: |
| @@ -92,7 +142,7 @@ out: | |||
| 92 | } | 142 | } |
| 93 | 143 | ||
| 94 | /* | 144 | /* |
| 95 | * Called under down_write(mmap_sem), page_table_lock is not held | 145 | * Called under down_write(mmap_sem). |
| 96 | */ | 146 | */ |
| 97 | 147 | ||
| 98 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 148 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
| @@ -171,16 +221,6 @@ static int hugetlbfs_commit_write(struct file *file, | |||
| 171 | return -EINVAL; | 221 | return -EINVAL; |
| 172 | } | 222 | } |
| 173 | 223 | ||
| 174 | static void huge_pagevec_release(struct pagevec *pvec) | ||
| 175 | { | ||
| 176 | int i; | ||
| 177 | |||
| 178 | for (i = 0; i < pagevec_count(pvec); ++i) | ||
| 179 | put_page(pvec->pages[i]); | ||
| 180 | |||
| 181 | pagevec_reinit(pvec); | ||
| 182 | } | ||
| 183 | |||
| 184 | static void truncate_huge_page(struct page *page) | 224 | static void truncate_huge_page(struct page *page) |
| 185 | { | 225 | { |
| 186 | clear_page_dirty(page); | 226 | clear_page_dirty(page); |
| @@ -224,52 +264,35 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart) | |||
| 224 | 264 | ||
| 225 | static void hugetlbfs_delete_inode(struct inode *inode) | 265 | static void hugetlbfs_delete_inode(struct inode *inode) |
| 226 | { | 266 | { |
| 227 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb); | ||
| 228 | |||
| 229 | hlist_del_init(&inode->i_hash); | ||
| 230 | list_del_init(&inode->i_list); | ||
| 231 | list_del_init(&inode->i_sb_list); | ||
| 232 | inode->i_state |= I_FREEING; | ||
| 233 | inodes_stat.nr_inodes--; | ||
| 234 | spin_unlock(&inode_lock); | ||
| 235 | |||
| 236 | if (inode->i_data.nrpages) | 267 | if (inode->i_data.nrpages) |
| 237 | truncate_hugepages(&inode->i_data, 0); | 268 | truncate_hugepages(&inode->i_data, 0); |
| 238 | |||
| 239 | security_inode_delete(inode); | ||
| 240 | |||
| 241 | if (sbinfo->free_inodes >= 0) { | ||
| 242 | spin_lock(&sbinfo->stat_lock); | ||
| 243 | sbinfo->free_inodes++; | ||
| 244 | spin_unlock(&sbinfo->stat_lock); | ||
| 245 | } | ||
| 246 | |||
| 247 | clear_inode(inode); | 269 | clear_inode(inode); |
| 248 | destroy_inode(inode); | ||
| 249 | } | 270 | } |
| 250 | 271 | ||
| 251 | static void hugetlbfs_forget_inode(struct inode *inode) | 272 | static void hugetlbfs_forget_inode(struct inode *inode) |
| 252 | { | 273 | { |
| 253 | struct super_block *super_block = inode->i_sb; | 274 | struct super_block *sb = inode->i_sb; |
| 254 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(super_block); | ||
| 255 | 275 | ||
| 256 | if (hlist_unhashed(&inode->i_hash)) | 276 | if (!hlist_unhashed(&inode->i_hash)) { |
| 257 | goto out_truncate; | 277 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) |
| 258 | 278 | list_move(&inode->i_list, &inode_unused); | |
| 259 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) { | 279 | inodes_stat.nr_unused++; |
| 260 | list_del(&inode->i_list); | 280 | if (!sb || (sb->s_flags & MS_ACTIVE)) { |
| 261 | list_add(&inode->i_list, &inode_unused); | 281 | spin_unlock(&inode_lock); |
| 262 | } | 282 | return; |
| 263 | inodes_stat.nr_unused++; | 283 | } |
| 264 | if (!super_block || (super_block->s_flags & MS_ACTIVE)) { | 284 | inode->i_state |= I_WILL_FREE; |
| 265 | spin_unlock(&inode_lock); | 285 | spin_unlock(&inode_lock); |
| 266 | return; | 286 | /* |
| 287 | * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK | ||
| 288 | * in our backing_dev_info. | ||
| 289 | */ | ||
| 290 | write_inode_now(inode, 1); | ||
| 291 | spin_lock(&inode_lock); | ||
| 292 | inode->i_state &= ~I_WILL_FREE; | ||
| 293 | inodes_stat.nr_unused--; | ||
| 294 | hlist_del_init(&inode->i_hash); | ||
| 267 | } | 295 | } |
| 268 | |||
| 269 | /* write_inode_now() ? */ | ||
| 270 | inodes_stat.nr_unused--; | ||
| 271 | hlist_del_init(&inode->i_hash); | ||
| 272 | out_truncate: | ||
| 273 | list_del_init(&inode->i_list); | 296 | list_del_init(&inode->i_list); |
| 274 | list_del_init(&inode->i_sb_list); | 297 | list_del_init(&inode->i_sb_list); |
| 275 | inode->i_state |= I_FREEING; | 298 | inode->i_state |= I_FREEING; |
| @@ -277,13 +300,6 @@ out_truncate: | |||
| 277 | spin_unlock(&inode_lock); | 300 | spin_unlock(&inode_lock); |
| 278 | if (inode->i_data.nrpages) | 301 | if (inode->i_data.nrpages) |
| 279 | truncate_hugepages(&inode->i_data, 0); | 302 | truncate_hugepages(&inode->i_data, 0); |
| 280 | |||
| 281 | if (sbinfo->free_inodes >= 0) { | ||
| 282 | spin_lock(&sbinfo->stat_lock); | ||
| 283 | sbinfo->free_inodes++; | ||
| 284 | spin_unlock(&sbinfo->stat_lock); | ||
| 285 | } | ||
| 286 | |||
| 287 | clear_inode(inode); | 303 | clear_inode(inode); |
| 288 | destroy_inode(inode); | 304 | destroy_inode(inode); |
| 289 | } | 305 | } |
| @@ -291,7 +307,7 @@ out_truncate: | |||
| 291 | static void hugetlbfs_drop_inode(struct inode *inode) | 307 | static void hugetlbfs_drop_inode(struct inode *inode) |
| 292 | { | 308 | { |
| 293 | if (!inode->i_nlink) | 309 | if (!inode->i_nlink) |
| 294 | hugetlbfs_delete_inode(inode); | 310 | generic_delete_inode(inode); |
| 295 | else | 311 | else |
| 296 | hugetlbfs_forget_inode(inode); | 312 | hugetlbfs_forget_inode(inode); |
| 297 | } | 313 | } |
| @@ -308,7 +324,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
| 308 | 324 | ||
| 309 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { | 325 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { |
| 310 | unsigned long h_vm_pgoff; | 326 | unsigned long h_vm_pgoff; |
| 311 | unsigned long v_length; | ||
| 312 | unsigned long v_offset; | 327 | unsigned long v_offset; |
| 313 | 328 | ||
| 314 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); | 329 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); |
| @@ -319,11 +334,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
| 319 | if (h_vm_pgoff >= h_pgoff) | 334 | if (h_vm_pgoff >= h_pgoff) |
| 320 | v_offset = 0; | 335 | v_offset = 0; |
| 321 | 336 | ||
| 322 | v_length = vma->vm_end - vma->vm_start; | 337 | unmap_hugepage_range(vma, |
| 323 | 338 | vma->vm_start + v_offset, vma->vm_end); | |
| 324 | zap_hugepage_range(vma, | ||
| 325 | vma->vm_start + v_offset, | ||
| 326 | v_length - v_offset); | ||
| 327 | } | 339 | } |
| 328 | } | 340 | } |
| 329 | 341 | ||
| @@ -379,17 +391,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | |||
| 379 | gid_t gid, int mode, dev_t dev) | 391 | gid_t gid, int mode, dev_t dev) |
| 380 | { | 392 | { |
| 381 | struct inode *inode; | 393 | struct inode *inode; |
| 382 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); | ||
| 383 | |||
| 384 | if (sbinfo->free_inodes >= 0) { | ||
| 385 | spin_lock(&sbinfo->stat_lock); | ||
| 386 | if (!sbinfo->free_inodes) { | ||
| 387 | spin_unlock(&sbinfo->stat_lock); | ||
| 388 | return NULL; | ||
| 389 | } | ||
| 390 | sbinfo->free_inodes--; | ||
| 391 | spin_unlock(&sbinfo->stat_lock); | ||
| 392 | } | ||
| 393 | 394 | ||
| 394 | inode = new_inode(sb); | 395 | inode = new_inode(sb); |
| 395 | if (inode) { | 396 | if (inode) { |
| @@ -531,29 +532,51 @@ static void hugetlbfs_put_super(struct super_block *sb) | |||
| 531 | } | 532 | } |
| 532 | } | 533 | } |
| 533 | 534 | ||
| 535 | static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) | ||
| 536 | { | ||
| 537 | if (sbinfo->free_inodes >= 0) { | ||
| 538 | spin_lock(&sbinfo->stat_lock); | ||
| 539 | if (unlikely(!sbinfo->free_inodes)) { | ||
| 540 | spin_unlock(&sbinfo->stat_lock); | ||
| 541 | return 0; | ||
| 542 | } | ||
| 543 | sbinfo->free_inodes--; | ||
| 544 | spin_unlock(&sbinfo->stat_lock); | ||
| 545 | } | ||
| 546 | |||
| 547 | return 1; | ||
| 548 | } | ||
| 549 | |||
| 550 | static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) | ||
| 551 | { | ||
| 552 | if (sbinfo->free_inodes >= 0) { | ||
| 553 | spin_lock(&sbinfo->stat_lock); | ||
| 554 | sbinfo->free_inodes++; | ||
| 555 | spin_unlock(&sbinfo->stat_lock); | ||
| 556 | } | ||
| 557 | } | ||
| 558 | |||
| 559 | |||
| 534 | static kmem_cache_t *hugetlbfs_inode_cachep; | 560 | static kmem_cache_t *hugetlbfs_inode_cachep; |
| 535 | 561 | ||
| 536 | static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) | 562 | static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) |
| 537 | { | 563 | { |
| 564 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); | ||
| 538 | struct hugetlbfs_inode_info *p; | 565 | struct hugetlbfs_inode_info *p; |
| 539 | 566 | ||
| 567 | if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) | ||
| 568 | return NULL; | ||
| 540 | p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); | 569 | p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); |
| 541 | if (!p) | 570 | if (unlikely(!p)) { |
| 571 | hugetlbfs_inc_free_inodes(sbinfo); | ||
| 542 | return NULL; | 572 | return NULL; |
| 573 | } | ||
| 543 | return &p->vfs_inode; | 574 | return &p->vfs_inode; |
| 544 | } | 575 | } |
| 545 | 576 | ||
| 546 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
| 547 | { | ||
| 548 | struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; | ||
| 549 | |||
| 550 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
| 551 | SLAB_CTOR_CONSTRUCTOR) | ||
| 552 | inode_init_once(&ei->vfs_inode); | ||
| 553 | } | ||
| 554 | |||
| 555 | static void hugetlbfs_destroy_inode(struct inode *inode) | 577 | static void hugetlbfs_destroy_inode(struct inode *inode) |
| 556 | { | 578 | { |
| 579 | hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); | ||
| 557 | mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); | 580 | mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); |
| 558 | kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); | 581 | kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); |
| 559 | } | 582 | } |
| @@ -565,6 +588,16 @@ static struct address_space_operations hugetlbfs_aops = { | |||
| 565 | .set_page_dirty = hugetlbfs_set_page_dirty, | 588 | .set_page_dirty = hugetlbfs_set_page_dirty, |
| 566 | }; | 589 | }; |
| 567 | 590 | ||
| 591 | |||
| 592 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
| 593 | { | ||
| 594 | struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; | ||
| 595 | |||
| 596 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
| 597 | SLAB_CTOR_CONSTRUCTOR) | ||
| 598 | inode_init_once(&ei->vfs_inode); | ||
| 599 | } | ||
| 600 | |||
| 568 | struct file_operations hugetlbfs_file_operations = { | 601 | struct file_operations hugetlbfs_file_operations = { |
| 569 | .mmap = hugetlbfs_file_mmap, | 602 | .mmap = hugetlbfs_file_mmap, |
| 570 | .fsync = simple_sync_file, | 603 | .fsync = simple_sync_file, |
| @@ -592,6 +625,7 @@ static struct super_operations hugetlbfs_ops = { | |||
| 592 | .alloc_inode = hugetlbfs_alloc_inode, | 625 | .alloc_inode = hugetlbfs_alloc_inode, |
| 593 | .destroy_inode = hugetlbfs_destroy_inode, | 626 | .destroy_inode = hugetlbfs_destroy_inode, |
| 594 | .statfs = hugetlbfs_statfs, | 627 | .statfs = hugetlbfs_statfs, |
| 628 | .delete_inode = hugetlbfs_delete_inode, | ||
| 595 | .drop_inode = hugetlbfs_drop_inode, | 629 | .drop_inode = hugetlbfs_drop_inode, |
| 596 | .put_super = hugetlbfs_put_super, | 630 | .put_super = hugetlbfs_put_super, |
| 597 | }; | 631 | }; |
