diff options
author | npiggin@suse.de <npiggin@suse.de> | 2009-08-20 12:35:05 -0400 |
---|---|---|
committer | al <al@dizzy.pdmi.ras.ru> | 2009-09-24 08:41:47 -0400 |
commit | 25d9e2d15286281ec834b829a4aaf8969011f1cd (patch) | |
tree | e4329a481ca197afae30f04335e023c7d04f7d67 | |
parent | eca6f534e61919b28fb21aafbd1c2983deae75be (diff) |
truncate: new helpers
Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
into mm/truncate.c.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | Documentation/vm/locking | 2 | ||||
-rw-r--r-- | fs/attr.c | 46 | ||||
-rw-r--r-- | include/linux/fs.h | 3 | ||||
-rw-r--r-- | include/linux/mm.h | 5 | ||||
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 62 | ||||
-rw-r--r-- | mm/mremap.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 40 | ||||
-rw-r--r-- | mm/truncate.c | 64 |
9 files changed, 120 insertions, 108 deletions
diff --git a/Documentation/vm/locking b/Documentation/vm/locking index f366fa956179..25fadb448760 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking | |||
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the | |||
80 | mm start up ... this is a loose form of stability on mm_users. For | 80 | mm start up ... this is a loose form of stability on mm_users. For |
81 | example, it is used in copy_mm to protect against a racing tlb_gather_mmu | 81 | example, it is used in copy_mm to protect against a racing tlb_gather_mmu |
82 | single address space optimization, so that the zap_page_range (from | 82 | single address space optimization, so that the zap_page_range (from |
83 | vmtruncate) does not lose sending ipi's to cloned threads that might | 83 | truncate) does not lose sending ipi's to cloned threads that might |
84 | be spawned underneath it and go to user mode to drag in pte's into tlbs. | 84 | be spawned underneath it and go to user mode to drag in pte's into tlbs. |
85 | 85 | ||
86 | swap_lock | 86 | swap_lock |
@@ -18,7 +18,7 @@ | |||
18 | /* Taken over from the old code... */ | 18 | /* Taken over from the old code... */ |
19 | 19 | ||
20 | /* POSIX UID/GID verification for setting inode attributes. */ | 20 | /* POSIX UID/GID verification for setting inode attributes. */ |
21 | int inode_change_ok(struct inode *inode, struct iattr *attr) | 21 | int inode_change_ok(const struct inode *inode, struct iattr *attr) |
22 | { | 22 | { |
23 | int retval = -EPERM; | 23 | int retval = -EPERM; |
24 | unsigned int ia_valid = attr->ia_valid; | 24 | unsigned int ia_valid = attr->ia_valid; |
@@ -60,9 +60,51 @@ fine: | |||
60 | error: | 60 | error: |
61 | return retval; | 61 | return retval; |
62 | } | 62 | } |
63 | |||
64 | EXPORT_SYMBOL(inode_change_ok); | 63 | EXPORT_SYMBOL(inode_change_ok); |
65 | 64 | ||
65 | /** | ||
66 | * inode_newsize_ok - may this inode be truncated to a given size | ||
67 | * @inode: the inode to be truncated | ||
68 | * @offset: the new size to assign to the inode | ||
69 | * @Returns: 0 on success, -ve errno on failure | ||
70 | * | ||
71 | * inode_newsize_ok will check filesystem limits and ulimits to check that the | ||
72 | * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ | ||
73 | * when necessary. Caller must not proceed with inode size change if failure is | ||
74 | * returned. @inode must be a file (not directory), with appropriate | ||
75 | * permissions to allow truncate (inode_newsize_ok does NOT check these | ||
76 | * conditions). | ||
77 | * | ||
78 | * inode_newsize_ok must be called with i_mutex held. | ||
79 | */ | ||
80 | int inode_newsize_ok(const struct inode *inode, loff_t offset) | ||
81 | { | ||
82 | if (inode->i_size < offset) { | ||
83 | unsigned long limit; | ||
84 | |||
85 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
86 | if (limit != RLIM_INFINITY && offset > limit) | ||
87 | goto out_sig; | ||
88 | if (offset > inode->i_sb->s_maxbytes) | ||
89 | goto out_big; | ||
90 | } else { | ||
91 | /* | ||
92 | * truncation of in-use swapfiles is disallowed - it would | ||
93 | * cause subsequent swapout to scribble on the now-freed | ||
94 | * blocks. | ||
95 | */ | ||
96 | if (IS_SWAPFILE(inode)) | ||
97 | return -ETXTBSY; | ||
98 | } | ||
99 | |||
100 | return 0; | ||
101 | out_sig: | ||
102 | send_sig(SIGXFSZ, current, 0); | ||
103 | out_big: | ||
104 | return -EFBIG; | ||
105 | } | ||
106 | EXPORT_SYMBOL(inode_newsize_ok); | ||
107 | |||
66 | int inode_setattr(struct inode * inode, struct iattr * attr) | 108 | int inode_setattr(struct inode * inode, struct iattr * attr) |
67 | { | 109 | { |
68 | unsigned int ia_valid = attr->ia_valid; | 110 | unsigned int ia_valid = attr->ia_valid; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 502d96ef345d..2b08b5ce09b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *, | |||
2382 | #define buffer_migrate_page NULL | 2382 | #define buffer_migrate_page NULL |
2383 | #endif | 2383 | #endif |
2384 | 2384 | ||
2385 | extern int inode_change_ok(struct inode *, struct iattr *); | 2385 | extern int inode_change_ok(const struct inode *, struct iattr *); |
2386 | extern int inode_newsize_ok(const struct inode *, loff_t offset); | ||
2386 | extern int __must_check inode_setattr(struct inode *, struct iattr *); | 2387 | extern int __must_check inode_setattr(struct inode *, struct iattr *); |
2387 | 2388 | ||
2388 | extern void file_update_time(struct file *file); | 2389 | extern void file_update_time(struct file *file); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144b..8347e938fb2f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, | |||
791 | unmap_mapping_range(mapping, holebegin, holelen, 0); | 791 | unmap_mapping_range(mapping, holebegin, holelen, 0); |
792 | } | 792 | } |
793 | 793 | ||
794 | extern int vmtruncate(struct inode * inode, loff_t offset); | 794 | extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); |
795 | extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); | 795 | extern int vmtruncate(struct inode *inode, loff_t offset); |
796 | extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); | ||
796 | 797 | ||
797 | #ifdef CONFIG_MMU | 798 | #ifdef CONFIG_MMU |
798 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 799 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
diff --git a/mm/filemap.c b/mm/filemap.c index bcc7372aebbc..33349adb227a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -58,7 +58,7 @@ | |||
58 | /* | 58 | /* |
59 | * Lock ordering: | 59 | * Lock ordering: |
60 | * | 60 | * |
61 | * ->i_mmap_lock (vmtruncate) | 61 | * ->i_mmap_lock (truncate_pagecache) |
62 | * ->private_lock (__free_pte->__set_page_dirty_buffers) | 62 | * ->private_lock (__free_pte->__set_page_dirty_buffers) |
63 | * ->swap_lock (exclusive_swap_page, others) | 63 | * ->swap_lock (exclusive_swap_page, others) |
64 | * ->mapping->tree_lock | 64 | * ->mapping->tree_lock |
diff --git a/mm/memory.c b/mm/memory.c index b1443ac07c00..ebcd3decac89 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
297 | unsigned long addr = vma->vm_start; | 297 | unsigned long addr = vma->vm_start; |
298 | 298 | ||
299 | /* | 299 | /* |
300 | * Hide vma from rmap and vmtruncate before freeing pgtables | 300 | * Hide vma from rmap and truncate_pagecache before freeing |
301 | * pgtables | ||
301 | */ | 302 | */ |
302 | anon_vma_unlink(vma); | 303 | anon_vma_unlink(vma); |
303 | unlink_file_vma(vma); | 304 | unlink_file_vma(vma); |
@@ -2407,7 +2408,7 @@ restart: | |||
2407 | * @mapping: the address space containing mmaps to be unmapped. | 2408 | * @mapping: the address space containing mmaps to be unmapped. |
2408 | * @holebegin: byte in first page to unmap, relative to the start of | 2409 | * @holebegin: byte in first page to unmap, relative to the start of |
2409 | * the underlying file. This will be rounded down to a PAGE_SIZE | 2410 | * the underlying file. This will be rounded down to a PAGE_SIZE |
2410 | * boundary. Note that this is different from vmtruncate(), which | 2411 | * boundary. Note that this is different from truncate_pagecache(), which |
2411 | * must keep the partial page. In contrast, we must get rid of | 2412 | * must keep the partial page. In contrast, we must get rid of |
2412 | * partial pages. | 2413 | * partial pages. |
2413 | * @holelen: size of prospective hole in bytes. This will be rounded | 2414 | * @holelen: size of prospective hole in bytes. This will be rounded |
@@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping, | |||
2458 | } | 2459 | } |
2459 | EXPORT_SYMBOL(unmap_mapping_range); | 2460 | EXPORT_SYMBOL(unmap_mapping_range); |
2460 | 2461 | ||
2461 | /** | ||
2462 | * vmtruncate - unmap mappings "freed" by truncate() syscall | ||
2463 | * @inode: inode of the file used | ||
2464 | * @offset: file offset to start truncating | ||
2465 | * | ||
2466 | * NOTE! We have to be ready to update the memory sharing | ||
2467 | * between the file and the memory map for a potential last | ||
2468 | * incomplete page. Ugly, but necessary. | ||
2469 | */ | ||
2470 | int vmtruncate(struct inode * inode, loff_t offset) | ||
2471 | { | ||
2472 | if (inode->i_size < offset) { | ||
2473 | unsigned long limit; | ||
2474 | |||
2475 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
2476 | if (limit != RLIM_INFINITY && offset > limit) | ||
2477 | goto out_sig; | ||
2478 | if (offset > inode->i_sb->s_maxbytes) | ||
2479 | goto out_big; | ||
2480 | i_size_write(inode, offset); | ||
2481 | } else { | ||
2482 | struct address_space *mapping = inode->i_mapping; | ||
2483 | |||
2484 | /* | ||
2485 | * truncation of in-use swapfiles is disallowed - it would | ||
2486 | * cause subsequent swapout to scribble on the now-freed | ||
2487 | * blocks. | ||
2488 | */ | ||
2489 | if (IS_SWAPFILE(inode)) | ||
2490 | return -ETXTBSY; | ||
2491 | i_size_write(inode, offset); | ||
2492 | |||
2493 | /* | ||
2494 | * unmap_mapping_range is called twice, first simply for | ||
2495 | * efficiency so that truncate_inode_pages does fewer | ||
2496 | * single-page unmaps. However after this first call, and | ||
2497 | * before truncate_inode_pages finishes, it is possible for | ||
2498 | * private pages to be COWed, which remain after | ||
2499 | * truncate_inode_pages finishes, hence the second | ||
2500 | * unmap_mapping_range call must be made for correctness. | ||
2501 | */ | ||
2502 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
2503 | truncate_inode_pages(mapping, offset); | ||
2504 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | ||
2505 | } | ||
2506 | |||
2507 | if (inode->i_op->truncate) | ||
2508 | inode->i_op->truncate(inode); | ||
2509 | return 0; | ||
2510 | |||
2511 | out_sig: | ||
2512 | send_sig(SIGXFSZ, current, 0); | ||
2513 | out_big: | ||
2514 | return -EFBIG; | ||
2515 | } | ||
2516 | EXPORT_SYMBOL(vmtruncate); | ||
2517 | |||
2518 | int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) | 2462 | int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) |
2519 | { | 2463 | { |
2520 | struct address_space *mapping = inode->i_mapping; | 2464 | struct address_space *mapping = inode->i_mapping; |
diff --git a/mm/mremap.c b/mm/mremap.c index 20a07dba6be0..97bff2547719 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
86 | if (vma->vm_file) { | 86 | if (vma->vm_file) { |
87 | /* | 87 | /* |
88 | * Subtle point from Rajesh Venkatasubramanian: before | 88 | * Subtle point from Rajesh Venkatasubramanian: before |
89 | * moving file-based ptes, we must lock vmtruncate out, | 89 | * moving file-based ptes, we must lock truncate_pagecache |
90 | * since it might clean the dst vma before the src vma, | 90 | * out, since it might clean the dst vma before the src vma, |
91 | * and we propagate stale pages into the dst afterward. | 91 | * and we propagate stale pages into the dst afterward. |
92 | */ | 92 | */ |
93 | mapping = vma->vm_file->f_mapping; | 93 | mapping = vma->vm_file->f_mapping; |
diff --git a/mm/nommu.c b/mm/nommu.c index 8d484241d034..56a446f05971 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = { | |||
83 | }; | 83 | }; |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * Handle all mappings that got truncated by a "truncate()" | ||
87 | * system call. | ||
88 | * | ||
89 | * NOTE! We have to be ready to update the memory sharing | ||
90 | * between the file and the memory map for a potential last | ||
91 | * incomplete page. Ugly, but necessary. | ||
92 | */ | ||
93 | int vmtruncate(struct inode *inode, loff_t offset) | ||
94 | { | ||
95 | struct address_space *mapping = inode->i_mapping; | ||
96 | unsigned long limit; | ||
97 | |||
98 | if (inode->i_size < offset) | ||
99 | goto do_expand; | ||
100 | i_size_write(inode, offset); | ||
101 | |||
102 | truncate_inode_pages(mapping, offset); | ||
103 | goto out_truncate; | ||
104 | |||
105 | do_expand: | ||
106 | limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; | ||
107 | if (limit != RLIM_INFINITY && offset > limit) | ||
108 | goto out_sig; | ||
109 | if (offset > inode->i_sb->s_maxbytes) | ||
110 | goto out; | ||
111 | i_size_write(inode, offset); | ||
112 | |||
113 | out_truncate: | ||
114 | if (inode->i_op->truncate) | ||
115 | inode->i_op->truncate(inode); | ||
116 | return 0; | ||
117 | out_sig: | ||
118 | send_sig(SIGXFSZ, current, 0); | ||
119 | out: | ||
120 | return -EFBIG; | ||
121 | } | ||
122 | |||
123 | EXPORT_SYMBOL(vmtruncate); | ||
124 | |||
125 | /* | ||
126 | * Return the total memory allocated for this pointer, not | 86 | * Return the total memory allocated for this pointer, not |
127 | * just what the caller asked for. | 87 | * just what the caller asked for. |
128 | * | 88 | * |
diff --git a/mm/truncate.c b/mm/truncate.c index ccc3ecf7cb98..5900afca0fa9 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping) | |||
465 | return invalidate_inode_pages2_range(mapping, 0, -1); | 465 | return invalidate_inode_pages2_range(mapping, 0, -1); |
466 | } | 466 | } |
467 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2); | 467 | EXPORT_SYMBOL_GPL(invalidate_inode_pages2); |
468 | |||
469 | /** | ||
470 | * truncate_pagecache - unmap and remove pagecache that has been truncated | ||
471 | * @inode: inode | ||
472 | * @old: old file offset | ||
473 | * @new: new file offset | ||
474 | * | ||
475 | * inode's new i_size must already be written before truncate_pagecache | ||
476 | * is called. | ||
477 | * | ||
478 | * This function should typically be called before the filesystem | ||
479 | * releases resources associated with the freed range (eg. deallocates | ||
480 | * blocks). This way, pagecache will always stay logically coherent | ||
481 | * with on-disk format, and the filesystem would not have to deal with | ||
482 | * situations such as writepage being called for a page that has already | ||
483 | * had its underlying blocks deallocated. | ||
484 | */ | ||
485 | void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) | ||
486 | { | ||
487 | if (new < old) { | ||
488 | struct address_space *mapping = inode->i_mapping; | ||
489 | |||
490 | /* | ||
491 | * unmap_mapping_range is called twice, first simply for | ||
492 | * efficiency so that truncate_inode_pages does fewer | ||
493 | * single-page unmaps. However after this first call, and | ||
494 | * before truncate_inode_pages finishes, it is possible for | ||
495 | * private pages to be COWed, which remain after | ||
496 | * truncate_inode_pages finishes, hence the second | ||
497 | * unmap_mapping_range call must be made for correctness. | ||
498 | */ | ||
499 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | ||
500 | truncate_inode_pages(mapping, new); | ||
501 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | ||
502 | } | ||
503 | } | ||
504 | EXPORT_SYMBOL(truncate_pagecache); | ||
505 | |||
506 | /** | ||
507 | * vmtruncate - unmap mappings "freed" by truncate() syscall | ||
508 | * @inode: inode of the file used | ||
509 | * @offset: file offset to start truncating | ||
510 | * | ||
511 | * NOTE! We have to be ready to update the memory sharing | ||
512 | * between the file and the memory map for a potential last | ||
513 | * incomplete page. Ugly, but necessary. | ||
514 | */ | ||
515 | int vmtruncate(struct inode *inode, loff_t offset) | ||
516 | { | ||
517 | loff_t oldsize; | ||
518 | int error; | ||
519 | |||
520 | error = inode_newsize_ok(inode, offset); | ||
521 | if (error) | ||
522 | return error; | ||
523 | oldsize = inode->i_size; | ||
524 | i_size_write(inode, offset); | ||
525 | truncate_pagecache(inode, oldsize, offset); | ||
526 | if (inode->i_op->truncate) | ||
527 | inode->i_op->truncate(inode); | ||
528 | |||
529 | return error; | ||
530 | } | ||
531 | EXPORT_SYMBOL(vmtruncate); | ||