diff options
author | Nick Piggin <npiggin@suse.de> | 2007-10-16 04:25:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:55 -0400 |
commit | afddba49d18f346e5cc2938b6ed7c512db18ca68 (patch) | |
tree | 4726e3d3b0e9e8e5b5d3b2b0cccb36446bbdf3ca /Documentation/filesystems | |
parent | 637aff46f94a754207c80c8c64bf1b74f24b967d (diff) |
fs: introduce write_begin, write_end, and perform_write aops
These are intended to replace prepare_write and commit_write with more
flexible alternatives that are also able to avoid the buffered write
deadlock problems efficiently (which prepare_write is unable to do).
[mark.fasheh@oracle.com: API design contributions, code review and fixes]
[akpm@linux-foundation.org: various fixes]
[dmonakhov@sw.ru: new aop block_write_begin fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Dmitriy Monakhov <dmonakhov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 9 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 45 |
2 files changed, 51 insertions, 3 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index f0f825808ca4..fe26cc978523 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -178,15 +178,18 @@ prototypes: | |||
178 | locking rules: | 178 | locking rules: |
179 | All except set_page_dirty may block | 179 | All except set_page_dirty may block |
180 | 180 | ||
181 | BKL PageLocked(page) | 181 | BKL PageLocked(page) i_sem |
182 | writepage: no yes, unlocks (see below) | 182 | writepage: no yes, unlocks (see below) |
183 | readpage: no yes, unlocks | 183 | readpage: no yes, unlocks |
184 | sync_page: no maybe | 184 | sync_page: no maybe |
185 | writepages: no | 185 | writepages: no |
186 | set_page_dirty no no | 186 | set_page_dirty no no |
187 | readpages: no | 187 | readpages: no |
188 | prepare_write: no yes | 188 | prepare_write: no yes yes |
189 | commit_write: no yes | 189 | commit_write: no yes yes |
190 | write_begin: no locks the page yes | ||
191 | write_end: no yes, unlocks yes | ||
192 | perform_write: no n/a yes | ||
190 | bmap: yes | 193 | bmap: yes |
191 | invalidatepage: no yes | 194 | invalidatepage: no yes |
192 | releasepage: no yes | 195 | releasepage: no yes |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 045f3e055a28..281c19ff7f45 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -537,6 +537,12 @@ struct address_space_operations { | |||
537 | struct list_head *pages, unsigned nr_pages); | 537 | struct list_head *pages, unsigned nr_pages); |
538 | int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); | 538 | int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); |
539 | int (*commit_write)(struct file *, struct page *, unsigned, unsigned); | 539 | int (*commit_write)(struct file *, struct page *, unsigned, unsigned); |
540 | int (*write_begin)(struct file *, struct address_space *mapping, | ||
541 | loff_t pos, unsigned len, unsigned flags, | ||
542 | struct page **pagep, void **fsdata); | ||
543 | int (*write_end)(struct file *, struct address_space *mapping, | ||
544 | loff_t pos, unsigned len, unsigned copied, | ||
545 | struct page *page, void *fsdata); | ||
540 | sector_t (*bmap)(struct address_space *, sector_t); | 546 | sector_t (*bmap)(struct address_space *, sector_t); |
541 | int (*invalidatepage) (struct page *, unsigned long); | 547 | int (*invalidatepage) (struct page *, unsigned long); |
542 | int (*releasepage) (struct page *, int); | 548 | int (*releasepage) (struct page *, int); |
@@ -633,6 +639,45 @@ struct address_space_operations { | |||
633 | operations. It should avoid returning an error if possible - | 639 | operations. It should avoid returning an error if possible - |
634 | errors should have been handled by prepare_write. | 640 | errors should have been handled by prepare_write. |
635 | 641 | ||
642 | write_begin: This is intended as a replacement for prepare_write. The | ||
643 | key differences being that: | ||
644 | - it returns a locked page (in *pagep) rather than being | ||
645 | given a pre locked page; | ||
646 | - it must be able to cope with short writes (where the | ||
647 | length passed to write_begin is greater than the number | ||
648 | of bytes copied into the page). | ||
649 | |||
650 | Called by the generic buffered write code to ask the filesystem to | ||
651 | prepare to write len bytes at the given offset in the file. The | ||
652 | address_space should check that the write will be able to complete, | ||
653 | by allocating space if necessary and doing any other internal | ||
654 | housekeeping. If the write will update parts of any basic-blocks on | ||
655 | storage, then those blocks should be pre-read (if they haven't been | ||
656 | read already) so that the updated blocks can be written out properly. | ||
657 | |||
658 | The filesystem must return the locked pagecache page for the specified | ||
659 | offset, in *pagep, for the caller to write into. | ||
660 | |||
661 | flags is a field for AOP_FLAG_xxx flags, described in | ||
662 | include/linux/fs.h. | ||
663 | |||
664 | A void * may be returned in fsdata, which then gets passed into | ||
665 | write_end. | ||
666 | |||
667 | Returns 0 on success; < 0 on failure (which is the error code), in | ||
668 | which case write_end is not called. | ||
669 | |||
670 | write_end: After a successful write_begin, and data copy, write_end must | ||
671 | be called. len is the original len passed to write_begin, and copied | ||
672 | is the amount that was able to be copied (copied == len is always true | ||
673 | if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag). | ||
674 | |||
675 | The filesystem must take care of unlocking the page and releasing it | ||
676 | refcount, and updating i_size. | ||
677 | |||
678 | Returns < 0 on failure, otherwise the number of bytes (<= 'copied') | ||
679 | that were able to be copied into pagecache. | ||
680 | |||
636 | bmap: called by the VFS to map a logical block offset within object to | 681 | bmap: called by the VFS to map a logical block offset within object to |
637 | physical block number. This method is used by the FIBMAP | 682 | physical block number. This method is used by the FIBMAP |
638 | ioctl and for working with swap-files. To be able to swap to | 683 | ioctl and for working with swap-files. To be able to swap to |