aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornpiggin@suse.de <npiggin@suse.de>2009-08-20 12:35:05 -0400
committeral <al@dizzy.pdmi.ras.ru>2009-09-24 08:41:47 -0400
commit25d9e2d15286281ec834b829a4aaf8969011f1cd (patch)
treee4329a481ca197afae30f04335e023c7d04f7d67
parenteca6f534e61919b28fb21aafbd1c2983deae75be (diff)
truncate: new helpers
Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--Documentation/vm/locking2
-rw-r--r--fs/attr.c46
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/mm.h5
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memory.c62
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nommu.c40
-rw-r--r--mm/truncate.c64
9 files changed, 120 insertions, 108 deletions
diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa956179..25fadb448760 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
80mm start up ... this is a loose form of stability on mm_users. For 80mm start up ... this is a loose form of stability on mm_users. For
81example, it is used in copy_mm to protect against a racing tlb_gather_mmu 81example, it is used in copy_mm to protect against a racing tlb_gather_mmu
82single address space optimization, so that the zap_page_range (from 82single address space optimization, so that the zap_page_range (from
83vmtruncate) does not lose sending ipi's to cloned threads that might 83truncate) does not lose sending ipi's to cloned threads that might
84be spawned underneath it and go to user mode to drag in pte's into tlbs. 84be spawned underneath it and go to user mode to drag in pte's into tlbs.
85 85
86swap_lock 86swap_lock
diff --git a/fs/attr.c b/fs/attr.c
index 9fe1b1bd30a8..96d394bdaddf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
18/* Taken over from the old code... */ 18/* Taken over from the old code... */
19 19
20/* POSIX UID/GID verification for setting inode attributes. */ 20/* POSIX UID/GID verification for setting inode attributes. */
21int inode_change_ok(struct inode *inode, struct iattr *attr) 21int inode_change_ok(const struct inode *inode, struct iattr *attr)
22{ 22{
23 int retval = -EPERM; 23 int retval = -EPERM;
24 unsigned int ia_valid = attr->ia_valid; 24 unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
60error: 60error:
61 return retval; 61 return retval;
62} 62}
63
64EXPORT_SYMBOL(inode_change_ok); 63EXPORT_SYMBOL(inode_change_ok);
65 64
65/**
66 * inode_newsize_ok - may this inode be truncated to a given size
67 * @inode: the inode to be truncated
68 * @offset: the new size to assign to the inode
69 * @Returns: 0 on success, -ve errno on failure
70 *
71 * inode_newsize_ok will check filesystem limits and ulimits to check that the
72 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
73 * when necessary. Caller must not proceed with inode size change if failure is
74 * returned. @inode must be a file (not directory), with appropriate
75 * permissions to allow truncate (inode_newsize_ok does NOT check these
76 * conditions).
77 *
78 * inode_newsize_ok must be called with i_mutex held.
79 */
80int inode_newsize_ok(const struct inode *inode, loff_t offset)
81{
82 if (inode->i_size < offset) {
83 unsigned long limit;
84
85 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
86 if (limit != RLIM_INFINITY && offset > limit)
87 goto out_sig;
88 if (offset > inode->i_sb->s_maxbytes)
89 goto out_big;
90 } else {
91 /*
92 * truncation of in-use swapfiles is disallowed - it would
93 * cause subsequent swapout to scribble on the now-freed
94 * blocks.
95 */
96 if (IS_SWAPFILE(inode))
97 return -ETXTBSY;
98 }
99
100 return 0;
101out_sig:
102 send_sig(SIGXFSZ, current, 0);
103out_big:
104 return -EFBIG;
105}
106EXPORT_SYMBOL(inode_newsize_ok);
107
66int inode_setattr(struct inode * inode, struct iattr * attr) 108int inode_setattr(struct inode * inode, struct iattr * attr)
67{ 109{
68 unsigned int ia_valid = attr->ia_valid; 110 unsigned int ia_valid = attr->ia_valid;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 502d96ef345d..2b08b5ce09b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
2382#define buffer_migrate_page NULL 2382#define buffer_migrate_page NULL
2383#endif 2383#endif
2384 2384
2385extern int inode_change_ok(struct inode *, struct iattr *); 2385extern int inode_change_ok(const struct inode *, struct iattr *);
2386extern int inode_newsize_ok(const struct inode *, loff_t offset);
2386extern int __must_check inode_setattr(struct inode *, struct iattr *); 2387extern int __must_check inode_setattr(struct inode *, struct iattr *);
2387 2388
2388extern void file_update_time(struct file *file); 2389extern void file_update_time(struct file *file);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144b..8347e938fb2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
791 unmap_mapping_range(mapping, holebegin, holelen, 0); 791 unmap_mapping_range(mapping, holebegin, holelen, 0);
792} 792}
793 793
794extern int vmtruncate(struct inode * inode, loff_t offset); 794extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
795extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); 795extern int vmtruncate(struct inode *inode, loff_t offset);
796extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
796 797
797#ifdef CONFIG_MMU 798#ifdef CONFIG_MMU
798extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, 799extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/mm/filemap.c b/mm/filemap.c
index bcc7372aebbc..33349adb227a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
58/* 58/*
59 * Lock ordering: 59 * Lock ordering:
60 * 60 *
61 * ->i_mmap_lock (vmtruncate) 61 * ->i_mmap_lock (truncate_pagecache)
62 * ->private_lock (__free_pte->__set_page_dirty_buffers) 62 * ->private_lock (__free_pte->__set_page_dirty_buffers)
63 * ->swap_lock (exclusive_swap_page, others) 63 * ->swap_lock (exclusive_swap_page, others)
64 * ->mapping->tree_lock 64 * ->mapping->tree_lock
diff --git a/mm/memory.c b/mm/memory.c
index b1443ac07c00..ebcd3decac89 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
297 unsigned long addr = vma->vm_start; 297 unsigned long addr = vma->vm_start;
298 298
299 /* 299 /*
300 * Hide vma from rmap and vmtruncate before freeing pgtables 300 * Hide vma from rmap and truncate_pagecache before freeing
301 * pgtables
301 */ 302 */
302 anon_vma_unlink(vma); 303 anon_vma_unlink(vma);
303 unlink_file_vma(vma); 304 unlink_file_vma(vma);
@@ -2407,7 +2408,7 @@ restart:
2407 * @mapping: the address space containing mmaps to be unmapped. 2408 * @mapping: the address space containing mmaps to be unmapped.
2408 * @holebegin: byte in first page to unmap, relative to the start of 2409 * @holebegin: byte in first page to unmap, relative to the start of
2409 * the underlying file. This will be rounded down to a PAGE_SIZE 2410 * the underlying file. This will be rounded down to a PAGE_SIZE
2410 * boundary. Note that this is different from vmtruncate(), which 2411 * boundary. Note that this is different from truncate_pagecache(), which
2411 * must keep the partial page. In contrast, we must get rid of 2412 * must keep the partial page. In contrast, we must get rid of
2412 * partial pages. 2413 * partial pages.
2413 * @holelen: size of prospective hole in bytes. This will be rounded 2414 * @holelen: size of prospective hole in bytes. This will be rounded
@@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
2458} 2459}
2459EXPORT_SYMBOL(unmap_mapping_range); 2460EXPORT_SYMBOL(unmap_mapping_range);
2460 2461
2461/**
2462 * vmtruncate - unmap mappings "freed" by truncate() syscall
2463 * @inode: inode of the file used
2464 * @offset: file offset to start truncating
2465 *
2466 * NOTE! We have to be ready to update the memory sharing
2467 * between the file and the memory map for a potential last
2468 * incomplete page. Ugly, but necessary.
2469 */
2470int vmtruncate(struct inode * inode, loff_t offset)
2471{
2472 if (inode->i_size < offset) {
2473 unsigned long limit;
2474
2475 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
2476 if (limit != RLIM_INFINITY && offset > limit)
2477 goto out_sig;
2478 if (offset > inode->i_sb->s_maxbytes)
2479 goto out_big;
2480 i_size_write(inode, offset);
2481 } else {
2482 struct address_space *mapping = inode->i_mapping;
2483
2484 /*
2485 * truncation of in-use swapfiles is disallowed - it would
2486 * cause subsequent swapout to scribble on the now-freed
2487 * blocks.
2488 */
2489 if (IS_SWAPFILE(inode))
2490 return -ETXTBSY;
2491 i_size_write(inode, offset);
2492
2493 /*
2494 * unmap_mapping_range is called twice, first simply for
2495 * efficiency so that truncate_inode_pages does fewer
2496 * single-page unmaps. However after this first call, and
2497 * before truncate_inode_pages finishes, it is possible for
2498 * private pages to be COWed, which remain after
2499 * truncate_inode_pages finishes, hence the second
2500 * unmap_mapping_range call must be made for correctness.
2501 */
2502 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2503 truncate_inode_pages(mapping, offset);
2504 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
2505 }
2506
2507 if (inode->i_op->truncate)
2508 inode->i_op->truncate(inode);
2509 return 0;
2510
2511out_sig:
2512 send_sig(SIGXFSZ, current, 0);
2513out_big:
2514 return -EFBIG;
2515}
2516EXPORT_SYMBOL(vmtruncate);
2517
2518int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) 2462int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
2519{ 2463{
2520 struct address_space *mapping = inode->i_mapping; 2464 struct address_space *mapping = inode->i_mapping;
diff --git a/mm/mremap.c b/mm/mremap.c
index 20a07dba6be0..97bff2547719 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
86 if (vma->vm_file) { 86 if (vma->vm_file) {
87 /* 87 /*
88 * Subtle point from Rajesh Venkatasubramanian: before 88 * Subtle point from Rajesh Venkatasubramanian: before
89 * moving file-based ptes, we must lock vmtruncate out, 89 * moving file-based ptes, we must lock truncate_pagecache
90 * since it might clean the dst vma before the src vma, 90 * out, since it might clean the dst vma before the src vma,
91 * and we propagate stale pages into the dst afterward. 91 * and we propagate stale pages into the dst afterward.
92 */ 92 */
93 mapping = vma->vm_file->f_mapping; 93 mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8d484241d034..56a446f05971 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = {
83}; 83};
84 84
85/* 85/*
86 * Handle all mappings that got truncated by a "truncate()"
87 * system call.
88 *
89 * NOTE! We have to be ready to update the memory sharing
90 * between the file and the memory map for a potential last
91 * incomplete page. Ugly, but necessary.
92 */
93int vmtruncate(struct inode *inode, loff_t offset)
94{
95 struct address_space *mapping = inode->i_mapping;
96 unsigned long limit;
97
98 if (inode->i_size < offset)
99 goto do_expand;
100 i_size_write(inode, offset);
101
102 truncate_inode_pages(mapping, offset);
103 goto out_truncate;
104
105do_expand:
106 limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
107 if (limit != RLIM_INFINITY && offset > limit)
108 goto out_sig;
109 if (offset > inode->i_sb->s_maxbytes)
110 goto out;
111 i_size_write(inode, offset);
112
113out_truncate:
114 if (inode->i_op->truncate)
115 inode->i_op->truncate(inode);
116 return 0;
117out_sig:
118 send_sig(SIGXFSZ, current, 0);
119out:
120 return -EFBIG;
121}
122
123EXPORT_SYMBOL(vmtruncate);
124
125/*
126 * Return the total memory allocated for this pointer, not 86 * Return the total memory allocated for this pointer, not
127 * just what the caller asked for. 87 * just what the caller asked for.
128 * 88 *
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb98..5900afca0fa9 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
465 return invalidate_inode_pages2_range(mapping, 0, -1); 465 return invalidate_inode_pages2_range(mapping, 0, -1);
466} 466}
467EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 467EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
468
469/**
470 * truncate_pagecache - unmap and remove pagecache that has been truncated
471 * @inode: inode
472 * @old: old file offset
473 * @new: new file offset
474 *
475 * inode's new i_size must already be written before truncate_pagecache
476 * is called.
477 *
478 * This function should typically be called before the filesystem
479 * releases resources associated with the freed range (eg. deallocates
480 * blocks). This way, pagecache will always stay logically coherent
481 * with on-disk format, and the filesystem would not have to deal with
482 * situations such as writepage being called for a page that has already
483 * had its underlying blocks deallocated.
484 */
485void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
486{
487 if (new < old) {
488 struct address_space *mapping = inode->i_mapping;
489
490 /*
491 * unmap_mapping_range is called twice, first simply for
492 * efficiency so that truncate_inode_pages does fewer
493 * single-page unmaps. However after this first call, and
494 * before truncate_inode_pages finishes, it is possible for
495 * private pages to be COWed, which remain after
496 * truncate_inode_pages finishes, hence the second
497 * unmap_mapping_range call must be made for correctness.
498 */
499 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
500 truncate_inode_pages(mapping, new);
501 unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
502 }
503}
504EXPORT_SYMBOL(truncate_pagecache);
505
506/**
507 * vmtruncate - unmap mappings "freed" by truncate() syscall
508 * @inode: inode of the file used
509 * @offset: file offset to start truncating
510 *
511 * NOTE! We have to be ready to update the memory sharing
512 * between the file and the memory map for a potential last
513 * incomplete page. Ugly, but necessary.
514 */
515int vmtruncate(struct inode *inode, loff_t offset)
516{
517 loff_t oldsize;
518 int error;
519
520 error = inode_newsize_ok(inode, offset);
521 if (error)
522 return error;
523 oldsize = inode->i_size;
524 i_size_write(inode, offset);
525 truncate_pagecache(inode, oldsize, offset);
526 if (inode->i_op->truncate)
527 inode->i_op->truncate(inode);
528
529 return error;
530}
531EXPORT_SYMBOL(vmtruncate);