diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 206 |
1 files changed, 120 insertions, 86 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3a9b6d179cbd..e026c807e6b3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -45,10 +45,58 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = { | |||
45 | 45 | ||
46 | int sysctl_hugetlb_shm_group; | 46 | int sysctl_hugetlb_shm_group; |
47 | 47 | ||
48 | static void huge_pagevec_release(struct pagevec *pvec) | ||
49 | { | ||
50 | int i; | ||
51 | |||
52 | for (i = 0; i < pagevec_count(pvec); ++i) | ||
53 | put_page(pvec->pages[i]); | ||
54 | |||
55 | pagevec_reinit(pvec); | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * huge_pages_needed tries to determine the number of new huge pages that | ||
60 | * will be required to fully populate this VMA. This will be equal to | ||
61 | * the size of the VMA in huge pages minus the number of huge pages | ||
62 | * (covered by this VMA) that are found in the page cache. | ||
63 | * | ||
64 | * Result is in bytes to be compatible with is_hugepage_mem_enough() | ||
65 | */ | ||
66 | unsigned long | ||
67 | huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) | ||
68 | { | ||
69 | int i; | ||
70 | struct pagevec pvec; | ||
71 | unsigned long start = vma->vm_start; | ||
72 | unsigned long end = vma->vm_end; | ||
73 | unsigned long hugepages = (end - start) >> HPAGE_SHIFT; | ||
74 | pgoff_t next = vma->vm_pgoff; | ||
75 | pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT); | ||
76 | |||
77 | pagevec_init(&pvec, 0); | ||
78 | while (next < endpg) { | ||
79 | if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) | ||
80 | break; | ||
81 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
82 | struct page *page = pvec.pages[i]; | ||
83 | if (page->index > next) | ||
84 | next = page->index; | ||
85 | if (page->index >= endpg) | ||
86 | break; | ||
87 | next++; | ||
88 | hugepages--; | ||
89 | } | ||
90 | huge_pagevec_release(&pvec); | ||
91 | } | ||
92 | return hugepages << HPAGE_SHIFT; | ||
93 | } | ||
94 | |||
48 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 95 | static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
49 | { | 96 | { |
50 | struct inode *inode = file->f_dentry->d_inode; | 97 | struct inode *inode = file->f_dentry->d_inode; |
51 | struct address_space *mapping = inode->i_mapping; | 98 | struct address_space *mapping = inode->i_mapping; |
99 | unsigned long bytes; | ||
52 | loff_t len, vma_len; | 100 | loff_t len, vma_len; |
53 | int ret; | 101 | int ret; |
54 | 102 | ||
@@ -67,6 +115,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
67 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) | 115 | if (vma->vm_end - vma->vm_start < HPAGE_SIZE) |
68 | return -EINVAL; | 116 | return -EINVAL; |
69 | 117 | ||
118 | bytes = huge_pages_needed(mapping, vma); | ||
119 | if (!is_hugepage_mem_enough(bytes)) | ||
120 | return -ENOMEM; | ||
121 | |||
70 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); | 122 | vma_len = (loff_t)(vma->vm_end - vma->vm_start); |
71 | 123 | ||
72 | down(&inode->i_sem); | 124 | down(&inode->i_sem); |
@@ -79,10 +131,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
79 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) | 131 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) |
80 | goto out; | 132 | goto out; |
81 | 133 | ||
82 | ret = hugetlb_prefault(mapping, vma); | 134 | ret = 0; |
83 | if (ret) | 135 | hugetlb_prefault_arch_hook(vma->vm_mm); |
84 | goto out; | ||
85 | |||
86 | if (inode->i_size < len) | 136 | if (inode->i_size < len) |
87 | inode->i_size = len; | 137 | inode->i_size = len; |
88 | out: | 138 | out: |
@@ -92,7 +142,7 @@ out: | |||
92 | } | 142 | } |
93 | 143 | ||
94 | /* | 144 | /* |
95 | * Called under down_write(mmap_sem), page_table_lock is not held | 145 | * Called under down_write(mmap_sem). |
96 | */ | 146 | */ |
97 | 147 | ||
98 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 148 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
@@ -171,16 +221,6 @@ static int hugetlbfs_commit_write(struct file *file, | |||
171 | return -EINVAL; | 221 | return -EINVAL; |
172 | } | 222 | } |
173 | 223 | ||
174 | static void huge_pagevec_release(struct pagevec *pvec) | ||
175 | { | ||
176 | int i; | ||
177 | |||
178 | for (i = 0; i < pagevec_count(pvec); ++i) | ||
179 | put_page(pvec->pages[i]); | ||
180 | |||
181 | pagevec_reinit(pvec); | ||
182 | } | ||
183 | |||
184 | static void truncate_huge_page(struct page *page) | 224 | static void truncate_huge_page(struct page *page) |
185 | { | 225 | { |
186 | clear_page_dirty(page); | 226 | clear_page_dirty(page); |
@@ -224,52 +264,35 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart) | |||
224 | 264 | ||
225 | static void hugetlbfs_delete_inode(struct inode *inode) | 265 | static void hugetlbfs_delete_inode(struct inode *inode) |
226 | { | 266 | { |
227 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb); | ||
228 | |||
229 | hlist_del_init(&inode->i_hash); | ||
230 | list_del_init(&inode->i_list); | ||
231 | list_del_init(&inode->i_sb_list); | ||
232 | inode->i_state |= I_FREEING; | ||
233 | inodes_stat.nr_inodes--; | ||
234 | spin_unlock(&inode_lock); | ||
235 | |||
236 | if (inode->i_data.nrpages) | 267 | if (inode->i_data.nrpages) |
237 | truncate_hugepages(&inode->i_data, 0); | 268 | truncate_hugepages(&inode->i_data, 0); |
238 | |||
239 | security_inode_delete(inode); | ||
240 | |||
241 | if (sbinfo->free_inodes >= 0) { | ||
242 | spin_lock(&sbinfo->stat_lock); | ||
243 | sbinfo->free_inodes++; | ||
244 | spin_unlock(&sbinfo->stat_lock); | ||
245 | } | ||
246 | |||
247 | clear_inode(inode); | 269 | clear_inode(inode); |
248 | destroy_inode(inode); | ||
249 | } | 270 | } |
250 | 271 | ||
251 | static void hugetlbfs_forget_inode(struct inode *inode) | 272 | static void hugetlbfs_forget_inode(struct inode *inode) |
252 | { | 273 | { |
253 | struct super_block *super_block = inode->i_sb; | 274 | struct super_block *sb = inode->i_sb; |
254 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(super_block); | ||
255 | 275 | ||
256 | if (hlist_unhashed(&inode->i_hash)) | 276 | if (!hlist_unhashed(&inode->i_hash)) { |
257 | goto out_truncate; | 277 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) |
258 | 278 | list_move(&inode->i_list, &inode_unused); | |
259 | if (!(inode->i_state & (I_DIRTY|I_LOCK))) { | 279 | inodes_stat.nr_unused++; |
260 | list_del(&inode->i_list); | 280 | if (!sb || (sb->s_flags & MS_ACTIVE)) { |
261 | list_add(&inode->i_list, &inode_unused); | 281 | spin_unlock(&inode_lock); |
262 | } | 282 | return; |
263 | inodes_stat.nr_unused++; | 283 | } |
264 | if (!super_block || (super_block->s_flags & MS_ACTIVE)) { | 284 | inode->i_state |= I_WILL_FREE; |
265 | spin_unlock(&inode_lock); | 285 | spin_unlock(&inode_lock); |
266 | return; | 286 | /* |
287 | * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK | ||
288 | * in our backing_dev_info. | ||
289 | */ | ||
290 | write_inode_now(inode, 1); | ||
291 | spin_lock(&inode_lock); | ||
292 | inode->i_state &= ~I_WILL_FREE; | ||
293 | inodes_stat.nr_unused--; | ||
294 | hlist_del_init(&inode->i_hash); | ||
267 | } | 295 | } |
268 | |||
269 | /* write_inode_now() ? */ | ||
270 | inodes_stat.nr_unused--; | ||
271 | hlist_del_init(&inode->i_hash); | ||
272 | out_truncate: | ||
273 | list_del_init(&inode->i_list); | 296 | list_del_init(&inode->i_list); |
274 | list_del_init(&inode->i_sb_list); | 297 | list_del_init(&inode->i_sb_list); |
275 | inode->i_state |= I_FREEING; | 298 | inode->i_state |= I_FREEING; |
@@ -277,13 +300,6 @@ out_truncate: | |||
277 | spin_unlock(&inode_lock); | 300 | spin_unlock(&inode_lock); |
278 | if (inode->i_data.nrpages) | 301 | if (inode->i_data.nrpages) |
279 | truncate_hugepages(&inode->i_data, 0); | 302 | truncate_hugepages(&inode->i_data, 0); |
280 | |||
281 | if (sbinfo->free_inodes >= 0) { | ||
282 | spin_lock(&sbinfo->stat_lock); | ||
283 | sbinfo->free_inodes++; | ||
284 | spin_unlock(&sbinfo->stat_lock); | ||
285 | } | ||
286 | |||
287 | clear_inode(inode); | 303 | clear_inode(inode); |
288 | destroy_inode(inode); | 304 | destroy_inode(inode); |
289 | } | 305 | } |
@@ -291,7 +307,7 @@ out_truncate: | |||
291 | static void hugetlbfs_drop_inode(struct inode *inode) | 307 | static void hugetlbfs_drop_inode(struct inode *inode) |
292 | { | 308 | { |
293 | if (!inode->i_nlink) | 309 | if (!inode->i_nlink) |
294 | hugetlbfs_delete_inode(inode); | 310 | generic_delete_inode(inode); |
295 | else | 311 | else |
296 | hugetlbfs_forget_inode(inode); | 312 | hugetlbfs_forget_inode(inode); |
297 | } | 313 | } |
@@ -308,7 +324,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
308 | 324 | ||
309 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { | 325 | vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { |
310 | unsigned long h_vm_pgoff; | 326 | unsigned long h_vm_pgoff; |
311 | unsigned long v_length; | ||
312 | unsigned long v_offset; | 327 | unsigned long v_offset; |
313 | 328 | ||
314 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); | 329 | h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); |
@@ -319,11 +334,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) | |||
319 | if (h_vm_pgoff >= h_pgoff) | 334 | if (h_vm_pgoff >= h_pgoff) |
320 | v_offset = 0; | 335 | v_offset = 0; |
321 | 336 | ||
322 | v_length = vma->vm_end - vma->vm_start; | 337 | unmap_hugepage_range(vma, |
323 | 338 | vma->vm_start + v_offset, vma->vm_end); | |
324 | zap_hugepage_range(vma, | ||
325 | vma->vm_start + v_offset, | ||
326 | v_length - v_offset); | ||
327 | } | 339 | } |
328 | } | 340 | } |
329 | 341 | ||
@@ -379,17 +391,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | |||
379 | gid_t gid, int mode, dev_t dev) | 391 | gid_t gid, int mode, dev_t dev) |
380 | { | 392 | { |
381 | struct inode *inode; | 393 | struct inode *inode; |
382 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); | ||
383 | |||
384 | if (sbinfo->free_inodes >= 0) { | ||
385 | spin_lock(&sbinfo->stat_lock); | ||
386 | if (!sbinfo->free_inodes) { | ||
387 | spin_unlock(&sbinfo->stat_lock); | ||
388 | return NULL; | ||
389 | } | ||
390 | sbinfo->free_inodes--; | ||
391 | spin_unlock(&sbinfo->stat_lock); | ||
392 | } | ||
393 | 394 | ||
394 | inode = new_inode(sb); | 395 | inode = new_inode(sb); |
395 | if (inode) { | 396 | if (inode) { |
@@ -531,29 +532,51 @@ static void hugetlbfs_put_super(struct super_block *sb) | |||
531 | } | 532 | } |
532 | } | 533 | } |
533 | 534 | ||
535 | static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) | ||
536 | { | ||
537 | if (sbinfo->free_inodes >= 0) { | ||
538 | spin_lock(&sbinfo->stat_lock); | ||
539 | if (unlikely(!sbinfo->free_inodes)) { | ||
540 | spin_unlock(&sbinfo->stat_lock); | ||
541 | return 0; | ||
542 | } | ||
543 | sbinfo->free_inodes--; | ||
544 | spin_unlock(&sbinfo->stat_lock); | ||
545 | } | ||
546 | |||
547 | return 1; | ||
548 | } | ||
549 | |||
550 | static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) | ||
551 | { | ||
552 | if (sbinfo->free_inodes >= 0) { | ||
553 | spin_lock(&sbinfo->stat_lock); | ||
554 | sbinfo->free_inodes++; | ||
555 | spin_unlock(&sbinfo->stat_lock); | ||
556 | } | ||
557 | } | ||
558 | |||
559 | |||
534 | static kmem_cache_t *hugetlbfs_inode_cachep; | 560 | static kmem_cache_t *hugetlbfs_inode_cachep; |
535 | 561 | ||
536 | static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) | 562 | static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) |
537 | { | 563 | { |
564 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); | ||
538 | struct hugetlbfs_inode_info *p; | 565 | struct hugetlbfs_inode_info *p; |
539 | 566 | ||
567 | if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) | ||
568 | return NULL; | ||
540 | p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); | 569 | p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); |
541 | if (!p) | 570 | if (unlikely(!p)) { |
571 | hugetlbfs_inc_free_inodes(sbinfo); | ||
542 | return NULL; | 572 | return NULL; |
573 | } | ||
543 | return &p->vfs_inode; | 574 | return &p->vfs_inode; |
544 | } | 575 | } |
545 | 576 | ||
546 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
547 | { | ||
548 | struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; | ||
549 | |||
550 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
551 | SLAB_CTOR_CONSTRUCTOR) | ||
552 | inode_init_once(&ei->vfs_inode); | ||
553 | } | ||
554 | |||
555 | static void hugetlbfs_destroy_inode(struct inode *inode) | 577 | static void hugetlbfs_destroy_inode(struct inode *inode) |
556 | { | 578 | { |
579 | hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); | ||
557 | mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); | 580 | mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); |
558 | kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); | 581 | kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); |
559 | } | 582 | } |
@@ -565,6 +588,16 @@ static struct address_space_operations hugetlbfs_aops = { | |||
565 | .set_page_dirty = hugetlbfs_set_page_dirty, | 588 | .set_page_dirty = hugetlbfs_set_page_dirty, |
566 | }; | 589 | }; |
567 | 590 | ||
591 | |||
592 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
593 | { | ||
594 | struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; | ||
595 | |||
596 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
597 | SLAB_CTOR_CONSTRUCTOR) | ||
598 | inode_init_once(&ei->vfs_inode); | ||
599 | } | ||
600 | |||
568 | struct file_operations hugetlbfs_file_operations = { | 601 | struct file_operations hugetlbfs_file_operations = { |
569 | .mmap = hugetlbfs_file_mmap, | 602 | .mmap = hugetlbfs_file_mmap, |
570 | .fsync = simple_sync_file, | 603 | .fsync = simple_sync_file, |
@@ -592,6 +625,7 @@ static struct super_operations hugetlbfs_ops = { | |||
592 | .alloc_inode = hugetlbfs_alloc_inode, | 625 | .alloc_inode = hugetlbfs_alloc_inode, |
593 | .destroy_inode = hugetlbfs_destroy_inode, | 626 | .destroy_inode = hugetlbfs_destroy_inode, |
594 | .statfs = hugetlbfs_statfs, | 627 | .statfs = hugetlbfs_statfs, |
628 | .delete_inode = hugetlbfs_delete_inode, | ||
595 | .drop_inode = hugetlbfs_drop_inode, | 629 | .drop_inode = hugetlbfs_drop_inode, |
596 | .put_super = hugetlbfs_put_super, | 630 | .put_super = hugetlbfs_put_super, |
597 | }; | 631 | }; |