aboutsummaryrefslogtreecommitdiffstats
path: root/fs/hugetlbfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r--fs/hugetlbfs/inode.c206
1 files changed, 120 insertions, 86 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3a9b6d179cbd..e026c807e6b3 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,10 +45,58 @@ static struct backing_dev_info hugetlbfs_backing_dev_info = {
45 45
46int sysctl_hugetlb_shm_group; 46int sysctl_hugetlb_shm_group;
47 47
48static void huge_pagevec_release(struct pagevec *pvec)
49{
50 int i;
51
52 for (i = 0; i < pagevec_count(pvec); ++i)
53 put_page(pvec->pages[i]);
54
55 pagevec_reinit(pvec);
56}
57
58/*
59 * huge_pages_needed tries to determine the number of new huge pages that
60 * will be required to fully populate this VMA. This will be equal to
61 * the size of the VMA in huge pages minus the number of huge pages
62 * (covered by this VMA) that are found in the page cache.
63 *
64 * Result is in bytes to be compatible with is_hugepage_mem_enough()
65 */
66unsigned long
67huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
68{
69 int i;
70 struct pagevec pvec;
71 unsigned long start = vma->vm_start;
72 unsigned long end = vma->vm_end;
73 unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
74 pgoff_t next = vma->vm_pgoff;
75 pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT);
76
77 pagevec_init(&pvec, 0);
78 while (next < endpg) {
79 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
80 break;
81 for (i = 0; i < pagevec_count(&pvec); i++) {
82 struct page *page = pvec.pages[i];
83 if (page->index > next)
84 next = page->index;
85 if (page->index >= endpg)
86 break;
87 next++;
88 hugepages--;
89 }
90 huge_pagevec_release(&pvec);
91 }
92 return hugepages << HPAGE_SHIFT;
93}
94
48static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 95static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
49{ 96{
50 struct inode *inode = file->f_dentry->d_inode; 97 struct inode *inode = file->f_dentry->d_inode;
51 struct address_space *mapping = inode->i_mapping; 98 struct address_space *mapping = inode->i_mapping;
99 unsigned long bytes;
52 loff_t len, vma_len; 100 loff_t len, vma_len;
53 int ret; 101 int ret;
54 102
@@ -67,6 +115,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
67 if (vma->vm_end - vma->vm_start < HPAGE_SIZE) 115 if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
68 return -EINVAL; 116 return -EINVAL;
69 117
118 bytes = huge_pages_needed(mapping, vma);
119 if (!is_hugepage_mem_enough(bytes))
120 return -ENOMEM;
121
70 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 122 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
71 123
72 down(&inode->i_sem); 124 down(&inode->i_sem);
@@ -79,10 +131,8 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
79 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) 131 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
80 goto out; 132 goto out;
81 133
82 ret = hugetlb_prefault(mapping, vma); 134 ret = 0;
83 if (ret) 135 hugetlb_prefault_arch_hook(vma->vm_mm);
84 goto out;
85
86 if (inode->i_size < len) 136 if (inode->i_size < len)
87 inode->i_size = len; 137 inode->i_size = len;
88out: 138out:
@@ -92,7 +142,7 @@ out:
92} 142}
93 143
94/* 144/*
95 * Called under down_write(mmap_sem), page_table_lock is not held 145 * Called under down_write(mmap_sem).
96 */ 146 */
97 147
98#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 148#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
@@ -171,16 +221,6 @@ static int hugetlbfs_commit_write(struct file *file,
171 return -EINVAL; 221 return -EINVAL;
172} 222}
173 223
174static void huge_pagevec_release(struct pagevec *pvec)
175{
176 int i;
177
178 for (i = 0; i < pagevec_count(pvec); ++i)
179 put_page(pvec->pages[i]);
180
181 pagevec_reinit(pvec);
182}
183
184static void truncate_huge_page(struct page *page) 224static void truncate_huge_page(struct page *page)
185{ 225{
186 clear_page_dirty(page); 226 clear_page_dirty(page);
@@ -224,52 +264,35 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
224 264
225static void hugetlbfs_delete_inode(struct inode *inode) 265static void hugetlbfs_delete_inode(struct inode *inode)
226{ 266{
227 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(inode->i_sb);
228
229 hlist_del_init(&inode->i_hash);
230 list_del_init(&inode->i_list);
231 list_del_init(&inode->i_sb_list);
232 inode->i_state |= I_FREEING;
233 inodes_stat.nr_inodes--;
234 spin_unlock(&inode_lock);
235
236 if (inode->i_data.nrpages) 267 if (inode->i_data.nrpages)
237 truncate_hugepages(&inode->i_data, 0); 268 truncate_hugepages(&inode->i_data, 0);
238
239 security_inode_delete(inode);
240
241 if (sbinfo->free_inodes >= 0) {
242 spin_lock(&sbinfo->stat_lock);
243 sbinfo->free_inodes++;
244 spin_unlock(&sbinfo->stat_lock);
245 }
246
247 clear_inode(inode); 269 clear_inode(inode);
248 destroy_inode(inode);
249} 270}
250 271
251static void hugetlbfs_forget_inode(struct inode *inode) 272static void hugetlbfs_forget_inode(struct inode *inode)
252{ 273{
253 struct super_block *super_block = inode->i_sb; 274 struct super_block *sb = inode->i_sb;
254 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(super_block);
255 275
256 if (hlist_unhashed(&inode->i_hash)) 276 if (!hlist_unhashed(&inode->i_hash)) {
257 goto out_truncate; 277 if (!(inode->i_state & (I_DIRTY|I_LOCK)))
258 278 list_move(&inode->i_list, &inode_unused);
259 if (!(inode->i_state & (I_DIRTY|I_LOCK))) { 279 inodes_stat.nr_unused++;
260 list_del(&inode->i_list); 280 if (!sb || (sb->s_flags & MS_ACTIVE)) {
261 list_add(&inode->i_list, &inode_unused); 281 spin_unlock(&inode_lock);
262 } 282 return;
263 inodes_stat.nr_unused++; 283 }
264 if (!super_block || (super_block->s_flags & MS_ACTIVE)) { 284 inode->i_state |= I_WILL_FREE;
265 spin_unlock(&inode_lock); 285 spin_unlock(&inode_lock);
266 return; 286 /*
287 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
288 * in our backing_dev_info.
289 */
290 write_inode_now(inode, 1);
291 spin_lock(&inode_lock);
292 inode->i_state &= ~I_WILL_FREE;
293 inodes_stat.nr_unused--;
294 hlist_del_init(&inode->i_hash);
267 } 295 }
268
269 /* write_inode_now() ? */
270 inodes_stat.nr_unused--;
271 hlist_del_init(&inode->i_hash);
272out_truncate:
273 list_del_init(&inode->i_list); 296 list_del_init(&inode->i_list);
274 list_del_init(&inode->i_sb_list); 297 list_del_init(&inode->i_sb_list);
275 inode->i_state |= I_FREEING; 298 inode->i_state |= I_FREEING;
@@ -277,13 +300,6 @@ out_truncate:
277 spin_unlock(&inode_lock); 300 spin_unlock(&inode_lock);
278 if (inode->i_data.nrpages) 301 if (inode->i_data.nrpages)
279 truncate_hugepages(&inode->i_data, 0); 302 truncate_hugepages(&inode->i_data, 0);
280
281 if (sbinfo->free_inodes >= 0) {
282 spin_lock(&sbinfo->stat_lock);
283 sbinfo->free_inodes++;
284 spin_unlock(&sbinfo->stat_lock);
285 }
286
287 clear_inode(inode); 303 clear_inode(inode);
288 destroy_inode(inode); 304 destroy_inode(inode);
289} 305}
@@ -291,7 +307,7 @@ out_truncate:
291static void hugetlbfs_drop_inode(struct inode *inode) 307static void hugetlbfs_drop_inode(struct inode *inode)
292{ 308{
293 if (!inode->i_nlink) 309 if (!inode->i_nlink)
294 hugetlbfs_delete_inode(inode); 310 generic_delete_inode(inode);
295 else 311 else
296 hugetlbfs_forget_inode(inode); 312 hugetlbfs_forget_inode(inode);
297} 313}
@@ -308,7 +324,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
308 324
309 vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { 325 vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
310 unsigned long h_vm_pgoff; 326 unsigned long h_vm_pgoff;
311 unsigned long v_length;
312 unsigned long v_offset; 327 unsigned long v_offset;
313 328
314 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); 329 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
@@ -319,11 +334,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
319 if (h_vm_pgoff >= h_pgoff) 334 if (h_vm_pgoff >= h_pgoff)
320 v_offset = 0; 335 v_offset = 0;
321 336
322 v_length = vma->vm_end - vma->vm_start; 337 unmap_hugepage_range(vma,
323 338 vma->vm_start + v_offset, vma->vm_end);
324 zap_hugepage_range(vma,
325 vma->vm_start + v_offset,
326 v_length - v_offset);
327 } 339 }
328} 340}
329 341
@@ -379,17 +391,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
379 gid_t gid, int mode, dev_t dev) 391 gid_t gid, int mode, dev_t dev)
380{ 392{
381 struct inode *inode; 393 struct inode *inode;
382 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
383
384 if (sbinfo->free_inodes >= 0) {
385 spin_lock(&sbinfo->stat_lock);
386 if (!sbinfo->free_inodes) {
387 spin_unlock(&sbinfo->stat_lock);
388 return NULL;
389 }
390 sbinfo->free_inodes--;
391 spin_unlock(&sbinfo->stat_lock);
392 }
393 394
394 inode = new_inode(sb); 395 inode = new_inode(sb);
395 if (inode) { 396 if (inode) {
@@ -531,29 +532,51 @@ static void hugetlbfs_put_super(struct super_block *sb)
531 } 532 }
532} 533}
533 534
535static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
536{
537 if (sbinfo->free_inodes >= 0) {
538 spin_lock(&sbinfo->stat_lock);
539 if (unlikely(!sbinfo->free_inodes)) {
540 spin_unlock(&sbinfo->stat_lock);
541 return 0;
542 }
543 sbinfo->free_inodes--;
544 spin_unlock(&sbinfo->stat_lock);
545 }
546
547 return 1;
548}
549
550static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
551{
552 if (sbinfo->free_inodes >= 0) {
553 spin_lock(&sbinfo->stat_lock);
554 sbinfo->free_inodes++;
555 spin_unlock(&sbinfo->stat_lock);
556 }
557}
558
559
534static kmem_cache_t *hugetlbfs_inode_cachep; 560static kmem_cache_t *hugetlbfs_inode_cachep;
535 561
536static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 562static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
537{ 563{
564 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
538 struct hugetlbfs_inode_info *p; 565 struct hugetlbfs_inode_info *p;
539 566
567 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
568 return NULL;
540 p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); 569 p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
541 if (!p) 570 if (unlikely(!p)) {
571 hugetlbfs_inc_free_inodes(sbinfo);
542 return NULL; 572 return NULL;
573 }
543 return &p->vfs_inode; 574 return &p->vfs_inode;
544} 575}
545 576
546static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
547{
548 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
549
550 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
551 SLAB_CTOR_CONSTRUCTOR)
552 inode_init_once(&ei->vfs_inode);
553}
554
555static void hugetlbfs_destroy_inode(struct inode *inode) 577static void hugetlbfs_destroy_inode(struct inode *inode)
556{ 578{
579 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
557 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 580 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
558 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 581 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
559} 582}
@@ -565,6 +588,16 @@ static struct address_space_operations hugetlbfs_aops = {
565 .set_page_dirty = hugetlbfs_set_page_dirty, 588 .set_page_dirty = hugetlbfs_set_page_dirty,
566}; 589};
567 590
591
592static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
593{
594 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
595
596 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
597 SLAB_CTOR_CONSTRUCTOR)
598 inode_init_once(&ei->vfs_inode);
599}
600
568struct file_operations hugetlbfs_file_operations = { 601struct file_operations hugetlbfs_file_operations = {
569 .mmap = hugetlbfs_file_mmap, 602 .mmap = hugetlbfs_file_mmap,
570 .fsync = simple_sync_file, 603 .fsync = simple_sync_file,
@@ -592,6 +625,7 @@ static struct super_operations hugetlbfs_ops = {
592 .alloc_inode = hugetlbfs_alloc_inode, 625 .alloc_inode = hugetlbfs_alloc_inode,
593 .destroy_inode = hugetlbfs_destroy_inode, 626 .destroy_inode = hugetlbfs_destroy_inode,
594 .statfs = hugetlbfs_statfs, 627 .statfs = hugetlbfs_statfs,
628 .delete_inode = hugetlbfs_delete_inode,
595 .drop_inode = hugetlbfs_drop_inode, 629 .drop_inode = hugetlbfs_drop_inode,
596 .put_super = hugetlbfs_put_super, 630 .put_super = hugetlbfs_put_super,
597}; 631};