diff options
Diffstat (limited to 'fs/fuse/file.c')
-rw-r--r-- | fs/fuse/file.c | 321 |
1 files changed, 305 insertions, 16 deletions
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 676b0bc8a86d..68051f3bdf91 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) | |||
210 | return (u64) v0 + ((u64) v1 << 32); | 210 | return (u64) v0 + ((u64) v1 << 32); |
211 | } | 211 | } |
212 | 212 | ||
213 | /* | ||
214 | * Check if page is under writeback | ||
215 | * | ||
216 | * This is currently done by walking the list of writepage requests | ||
217 | * for the inode, which can be pretty inefficient. | ||
218 | */ | ||
219 | static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | ||
220 | { | ||
221 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
222 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
223 | struct fuse_req *req; | ||
224 | bool found = false; | ||
225 | |||
226 | spin_lock(&fc->lock); | ||
227 | list_for_each_entry(req, &fi->writepages, writepages_entry) { | ||
228 | pgoff_t curr_index; | ||
229 | |||
230 | BUG_ON(req->inode != inode); | ||
231 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; | ||
232 | if (curr_index == index) { | ||
233 | found = true; | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | spin_unlock(&fc->lock); | ||
238 | |||
239 | return found; | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * Wait for page writeback to be completed. | ||
244 | * | ||
245 | * Since fuse doesn't rely on the VM writeback tracking, this has to | ||
246 | * use some other means. | ||
247 | */ | ||
248 | static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) | ||
249 | { | ||
250 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
251 | |||
252 | wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
213 | static int fuse_flush(struct file *file, fl_owner_t id) | 256 | static int fuse_flush(struct file *file, fl_owner_t id) |
214 | { | 257 | { |
215 | struct inode *inode = file->f_path.dentry->d_inode; | 258 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
245 | return err; | 288 | return err; |
246 | } | 289 | } |
247 | 290 | ||
291 | /* | ||
292 | * Wait for all pending writepages on the inode to finish. | ||
293 | * | ||
294 | * This is currently done by blocking further writes with FUSE_NOWRITE | ||
295 | * and waiting for all sent writes to complete. | ||
296 | * | ||
297 | * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage | ||
298 | * could conflict with truncation. | ||
299 | */ | ||
300 | static void fuse_sync_writes(struct inode *inode) | ||
301 | { | ||
302 | fuse_set_nowrite(inode); | ||
303 | fuse_release_nowrite(inode); | ||
304 | } | ||
305 | |||
248 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | 306 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, |
249 | int isdir) | 307 | int isdir) |
250 | { | 308 | { |
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | |||
261 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) | 319 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) |
262 | return 0; | 320 | return 0; |
263 | 321 | ||
322 | /* | ||
323 | * Start writeback against all dirty pages of the inode, then | ||
324 | * wait for all outstanding writes, before sending the FSYNC | ||
325 | * request. | ||
326 | */ | ||
327 | err = write_inode_now(inode, 0); | ||
328 | if (err) | ||
329 | return err; | ||
330 | |||
331 | fuse_sync_writes(inode); | ||
332 | |||
264 | req = fuse_get_req(fc); | 333 | req = fuse_get_req(fc); |
265 | if (IS_ERR(req)) | 334 | if (IS_ERR(req)) |
266 | return PTR_ERR(req); | 335 | return PTR_ERR(req); |
@@ -340,6 +409,13 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
340 | if (is_bad_inode(inode)) | 409 | if (is_bad_inode(inode)) |
341 | goto out; | 410 | goto out; |
342 | 411 | ||
412 | /* | ||
413 | * Page writeback can extend beyond the liftime of the | ||
414 | * page-cache page, so make sure we read a properly synced | ||
415 | * page. | ||
416 | */ | ||
417 | fuse_wait_on_page_writeback(inode, page->index); | ||
418 | |||
343 | req = fuse_get_req(fc); | 419 | req = fuse_get_req(fc); |
344 | err = PTR_ERR(req); | 420 | err = PTR_ERR(req); |
345 | if (IS_ERR(req)) | 421 | if (IS_ERR(req)) |
@@ -411,6 +487,8 @@ static int fuse_readpages_fill(void *_data, struct page *page) | |||
411 | struct inode *inode = data->inode; | 487 | struct inode *inode = data->inode; |
412 | struct fuse_conn *fc = get_fuse_conn(inode); | 488 | struct fuse_conn *fc = get_fuse_conn(inode); |
413 | 489 | ||
490 | fuse_wait_on_page_writeback(inode, page->index); | ||
491 | |||
414 | if (req->num_pages && | 492 | if (req->num_pages && |
415 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || | 493 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || |
416 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || | 494 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || |
@@ -477,11 +555,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
477 | } | 555 | } |
478 | 556 | ||
479 | static void fuse_write_fill(struct fuse_req *req, struct file *file, | 557 | static void fuse_write_fill(struct fuse_req *req, struct file *file, |
480 | struct inode *inode, loff_t pos, size_t count, | 558 | struct fuse_file *ff, struct inode *inode, |
481 | int writepage) | 559 | loff_t pos, size_t count, int writepage) |
482 | { | 560 | { |
483 | struct fuse_conn *fc = get_fuse_conn(inode); | 561 | struct fuse_conn *fc = get_fuse_conn(inode); |
484 | struct fuse_file *ff = file->private_data; | ||
485 | struct fuse_write_in *inarg = &req->misc.write.in; | 562 | struct fuse_write_in *inarg = &req->misc.write.in; |
486 | struct fuse_write_out *outarg = &req->misc.write.out; | 563 | struct fuse_write_out *outarg = &req->misc.write.out; |
487 | 564 | ||
@@ -490,7 +567,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, | |||
490 | inarg->offset = pos; | 567 | inarg->offset = pos; |
491 | inarg->size = count; | 568 | inarg->size = count; |
492 | inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; | 569 | inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; |
493 | inarg->flags = file->f_flags; | 570 | inarg->flags = file ? file->f_flags : 0; |
494 | req->in.h.opcode = FUSE_WRITE; | 571 | req->in.h.opcode = FUSE_WRITE; |
495 | req->in.h.nodeid = get_node_id(inode); | 572 | req->in.h.nodeid = get_node_id(inode); |
496 | req->in.argpages = 1; | 573 | req->in.argpages = 1; |
@@ -511,7 +588,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, | |||
511 | fl_owner_t owner) | 588 | fl_owner_t owner) |
512 | { | 589 | { |
513 | struct fuse_conn *fc = get_fuse_conn(inode); | 590 | struct fuse_conn *fc = get_fuse_conn(inode); |
514 | fuse_write_fill(req, file, inode, pos, count, 0); | 591 | fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); |
515 | if (owner != NULL) { | 592 | if (owner != NULL) { |
516 | struct fuse_write_in *inarg = &req->misc.write.in; | 593 | struct fuse_write_in *inarg = &req->misc.write.in; |
517 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; | 594 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; |
@@ -546,6 +623,12 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, | |||
546 | if (is_bad_inode(inode)) | 623 | if (is_bad_inode(inode)) |
547 | return -EIO; | 624 | return -EIO; |
548 | 625 | ||
626 | /* | ||
627 | * Make sure writepages on the same page are not mixed up with | ||
628 | * plain writes. | ||
629 | */ | ||
630 | fuse_wait_on_page_writeback(inode, page->index); | ||
631 | |||
549 | req = fuse_get_req(fc); | 632 | req = fuse_get_req(fc); |
550 | if (IS_ERR(req)) | 633 | if (IS_ERR(req)) |
551 | return PTR_ERR(req); | 634 | return PTR_ERR(req); |
@@ -716,21 +799,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, | |||
716 | return res; | 799 | return res; |
717 | } | 800 | } |
718 | 801 | ||
719 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | 802 | static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) |
720 | { | 803 | { |
721 | if ((vma->vm_flags & VM_SHARED)) { | 804 | __free_page(req->pages[0]); |
722 | if ((vma->vm_flags & VM_WRITE)) | 805 | fuse_file_put(req->ff); |
723 | return -ENODEV; | 806 | fuse_put_request(fc, req); |
724 | else | 807 | } |
725 | vma->vm_flags &= ~VM_MAYWRITE; | 808 | |
809 | static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) | ||
810 | { | ||
811 | struct inode *inode = req->inode; | ||
812 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
813 | struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; | ||
814 | |||
815 | list_del(&req->writepages_entry); | ||
816 | dec_bdi_stat(bdi, BDI_WRITEBACK); | ||
817 | dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); | ||
818 | bdi_writeout_inc(bdi); | ||
819 | wake_up(&fi->page_waitq); | ||
820 | } | ||
821 | |||
822 | /* Called under fc->lock, may release and reacquire it */ | ||
823 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) | ||
824 | { | ||
825 | struct fuse_inode *fi = get_fuse_inode(req->inode); | ||
826 | loff_t size = i_size_read(req->inode); | ||
827 | struct fuse_write_in *inarg = &req->misc.write.in; | ||
828 | |||
829 | if (!fc->connected) | ||
830 | goto out_free; | ||
831 | |||
832 | if (inarg->offset + PAGE_CACHE_SIZE <= size) { | ||
833 | inarg->size = PAGE_CACHE_SIZE; | ||
834 | } else if (inarg->offset < size) { | ||
835 | inarg->size = size & (PAGE_CACHE_SIZE - 1); | ||
836 | } else { | ||
837 | /* Got truncated off completely */ | ||
838 | goto out_free; | ||
726 | } | 839 | } |
727 | return generic_file_mmap(file, vma); | 840 | |
841 | req->in.args[1].size = inarg->size; | ||
842 | fi->writectr++; | ||
843 | request_send_background_locked(fc, req); | ||
844 | return; | ||
845 | |||
846 | out_free: | ||
847 | fuse_writepage_finish(fc, req); | ||
848 | spin_unlock(&fc->lock); | ||
849 | fuse_writepage_free(fc, req); | ||
850 | spin_lock(&fc->lock); | ||
728 | } | 851 | } |
729 | 852 | ||
730 | static int fuse_set_page_dirty(struct page *page) | 853 | /* |
854 | * If fi->writectr is positive (no truncate or fsync going on) send | ||
855 | * all queued writepage requests. | ||
856 | * | ||
857 | * Called with fc->lock | ||
858 | */ | ||
859 | void fuse_flush_writepages(struct inode *inode) | ||
731 | { | 860 | { |
732 | printk("fuse_set_page_dirty: should not happen\n"); | 861 | struct fuse_conn *fc = get_fuse_conn(inode); |
733 | dump_stack(); | 862 | struct fuse_inode *fi = get_fuse_inode(inode); |
863 | struct fuse_req *req; | ||
864 | |||
865 | while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { | ||
866 | req = list_entry(fi->queued_writes.next, struct fuse_req, list); | ||
867 | list_del_init(&req->list); | ||
868 | fuse_send_writepage(fc, req); | ||
869 | } | ||
870 | } | ||
871 | |||
872 | static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) | ||
873 | { | ||
874 | struct inode *inode = req->inode; | ||
875 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
876 | |||
877 | mapping_set_error(inode->i_mapping, req->out.h.error); | ||
878 | spin_lock(&fc->lock); | ||
879 | fi->writectr--; | ||
880 | fuse_writepage_finish(fc, req); | ||
881 | spin_unlock(&fc->lock); | ||
882 | fuse_writepage_free(fc, req); | ||
883 | } | ||
884 | |||
885 | static int fuse_writepage_locked(struct page *page) | ||
886 | { | ||
887 | struct address_space *mapping = page->mapping; | ||
888 | struct inode *inode = mapping->host; | ||
889 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
890 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
891 | struct fuse_req *req; | ||
892 | struct fuse_file *ff; | ||
893 | struct page *tmp_page; | ||
894 | |||
895 | set_page_writeback(page); | ||
896 | |||
897 | req = fuse_request_alloc_nofs(); | ||
898 | if (!req) | ||
899 | goto err; | ||
900 | |||
901 | tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
902 | if (!tmp_page) | ||
903 | goto err_free; | ||
904 | |||
905 | spin_lock(&fc->lock); | ||
906 | BUG_ON(list_empty(&fi->write_files)); | ||
907 | ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); | ||
908 | req->ff = fuse_file_get(ff); | ||
909 | spin_unlock(&fc->lock); | ||
910 | |||
911 | fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); | ||
912 | |||
913 | copy_highpage(tmp_page, page); | ||
914 | req->num_pages = 1; | ||
915 | req->pages[0] = tmp_page; | ||
916 | req->page_offset = 0; | ||
917 | req->end = fuse_writepage_end; | ||
918 | req->inode = inode; | ||
919 | |||
920 | inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); | ||
921 | inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); | ||
922 | end_page_writeback(page); | ||
923 | |||
924 | spin_lock(&fc->lock); | ||
925 | list_add(&req->writepages_entry, &fi->writepages); | ||
926 | list_add_tail(&req->list, &fi->queued_writes); | ||
927 | fuse_flush_writepages(inode); | ||
928 | spin_unlock(&fc->lock); | ||
929 | |||
930 | return 0; | ||
931 | |||
932 | err_free: | ||
933 | fuse_request_free(req); | ||
934 | err: | ||
935 | end_page_writeback(page); | ||
936 | return -ENOMEM; | ||
937 | } | ||
938 | |||
939 | static int fuse_writepage(struct page *page, struct writeback_control *wbc) | ||
940 | { | ||
941 | int err; | ||
942 | |||
943 | err = fuse_writepage_locked(page); | ||
944 | unlock_page(page); | ||
945 | |||
946 | return err; | ||
947 | } | ||
948 | |||
949 | static int fuse_launder_page(struct page *page) | ||
950 | { | ||
951 | int err = 0; | ||
952 | if (clear_page_dirty_for_io(page)) { | ||
953 | struct inode *inode = page->mapping->host; | ||
954 | err = fuse_writepage_locked(page); | ||
955 | if (!err) | ||
956 | fuse_wait_on_page_writeback(inode, page->index); | ||
957 | } | ||
958 | return err; | ||
959 | } | ||
960 | |||
961 | /* | ||
962 | * Write back dirty pages now, because there may not be any suitable | ||
963 | * open files later | ||
964 | */ | ||
965 | static void fuse_vma_close(struct vm_area_struct *vma) | ||
966 | { | ||
967 | filemap_write_and_wait(vma->vm_file->f_mapping); | ||
968 | } | ||
969 | |||
970 | /* | ||
971 | * Wait for writeback against this page to complete before allowing it | ||
972 | * to be marked dirty again, and hence written back again, possibly | ||
973 | * before the previous writepage completed. | ||
974 | * | ||
975 | * Block here, instead of in ->writepage(), so that the userspace fs | ||
976 | * can only block processes actually operating on the filesystem. | ||
977 | * | ||
978 | * Otherwise unprivileged userspace fs would be able to block | ||
979 | * unrelated: | ||
980 | * | ||
981 | * - page migration | ||
982 | * - sync(2) | ||
983 | * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER | ||
984 | */ | ||
985 | static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
986 | { | ||
987 | /* | ||
988 | * Don't use page->mapping as it may become NULL from a | ||
989 | * concurrent truncate. | ||
990 | */ | ||
991 | struct inode *inode = vma->vm_file->f_mapping->host; | ||
992 | |||
993 | fuse_wait_on_page_writeback(inode, page->index); | ||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | static struct vm_operations_struct fuse_file_vm_ops = { | ||
998 | .close = fuse_vma_close, | ||
999 | .fault = filemap_fault, | ||
1000 | .page_mkwrite = fuse_page_mkwrite, | ||
1001 | }; | ||
1002 | |||
1003 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
1004 | { | ||
1005 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { | ||
1006 | struct inode *inode = file->f_dentry->d_inode; | ||
1007 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
1008 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1009 | struct fuse_file *ff = file->private_data; | ||
1010 | /* | ||
1011 | * file may be written through mmap, so chain it onto the | ||
1012 | * inodes's write_file list | ||
1013 | */ | ||
1014 | spin_lock(&fc->lock); | ||
1015 | if (list_empty(&ff->write_entry)) | ||
1016 | list_add(&ff->write_entry, &fi->write_files); | ||
1017 | spin_unlock(&fc->lock); | ||
1018 | } | ||
1019 | file_accessed(file); | ||
1020 | vma->vm_ops = &fuse_file_vm_ops; | ||
734 | return 0; | 1021 | return 0; |
735 | } | 1022 | } |
736 | 1023 | ||
@@ -940,10 +1227,12 @@ static const struct file_operations fuse_direct_io_file_operations = { | |||
940 | 1227 | ||
941 | static const struct address_space_operations fuse_file_aops = { | 1228 | static const struct address_space_operations fuse_file_aops = { |
942 | .readpage = fuse_readpage, | 1229 | .readpage = fuse_readpage, |
1230 | .writepage = fuse_writepage, | ||
1231 | .launder_page = fuse_launder_page, | ||
943 | .write_begin = fuse_write_begin, | 1232 | .write_begin = fuse_write_begin, |
944 | .write_end = fuse_write_end, | 1233 | .write_end = fuse_write_end, |
945 | .readpages = fuse_readpages, | 1234 | .readpages = fuse_readpages, |
946 | .set_page_dirty = fuse_set_page_dirty, | 1235 | .set_page_dirty = __set_page_dirty_nobuffers, |
947 | .bmap = fuse_bmap, | 1236 | .bmap = fuse_bmap, |
948 | }; | 1237 | }; |
949 | 1238 | ||