aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiklos Szeredi <mszeredi@suse.cz>2010-05-25 09:06:07 -0400
committerMiklos Szeredi <mszeredi@suse.cz>2010-05-25 09:06:07 -0400
commitce534fb052928ce556639d7ecf01cbf4e01321e1 (patch)
treec09f7c592a41d635d7f2f54fc7fe10594f332b69
parenta52116aba5b3eed0ee41f70b794cc1937acd5cb8 (diff)
fuse: allow splice to move pages
When splicing buffers to the fuse device with SPLICE_F_MOVE, try to move pages from the pipe buffer into the page cache. This allows populating the fuse filesystem's cache without ever touching the page contents, i.e. zero copy read capability. The following steps are performed when trying to move a page into the page cache: - buf->ops->confirm() to make sure the new page is uptodate - buf->ops->steal() to try to remove the new page from it's previous place - remove_from_page_cache() on the old page - add_to_page_cache_locked() on the new page If any of the above steps fail (non fatally) then the code falls back to copying the page. In particular ->steal() will fail if there are external references (other than the page cache and the pipe buffer) to the page. Also since the remove_from_page_cache() + add_to_page_cache_locked() are non-atomic it is possible that the page cache is repopulated in between the two and add_to_page_cache_locked() will fail. This could be fixed by creating a new atomic replace_page_cache_page() function. fuse_readpages_end() needed to be reworked so it works even if page->mapping is NULL for some or all pages which can happen if the add_to_page_cache_locked() failed. A number of sanity checks were added to make sure the stolen pages don't have weird flags set, etc... These could be moved into generic splice/steal code. Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
-rw-r--r--fs/fuse/dev.c151
-rw-r--r--fs/fuse/file.c28
-rw-r--r--fs/fuse/fuse_i.h3
3 files changed, 167 insertions, 15 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2795045484ee..b070d3adf9b0 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -17,6 +17,8 @@
17#include <linux/file.h> 17#include <linux/file.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/pipe_fs_i.h> 19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h>
21#include <linux/splice.h>
20 22
21MODULE_ALIAS_MISCDEV(FUSE_MINOR); 23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
22 24
@@ -509,6 +511,7 @@ struct fuse_copy_state {
509 void *mapaddr; 511 void *mapaddr;
510 void *buf; 512 void *buf;
511 unsigned len; 513 unsigned len;
514 unsigned move_pages:1;
512}; 515};
513 516
514static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, 517static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
@@ -609,13 +612,135 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
609 return ncpy; 612 return ncpy;
610} 613}
611 614
615static int fuse_check_page(struct page *page)
616{
617 if (page_mapcount(page) ||
618 page->mapping != NULL ||
619 page_count(page) != 1 ||
620 (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
621 ~(1 << PG_locked |
622 1 << PG_referenced |
623 1 << PG_uptodate |
624 1 << PG_lru |
625 1 << PG_active |
626 1 << PG_reclaim))) {
627 printk(KERN_WARNING "fuse: trying to steal weird page\n");
628 printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
629 return 1;
630 }
631 return 0;
632}
633
634static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
635{
636 int err;
637 struct page *oldpage = *pagep;
638 struct page *newpage;
639 struct pipe_buffer *buf = cs->pipebufs;
640 struct address_space *mapping;
641 pgoff_t index;
642
643 unlock_request(cs->fc, cs->req);
644 fuse_copy_finish(cs);
645
646 err = buf->ops->confirm(cs->pipe, buf);
647 if (err)
648 return err;
649
650 BUG_ON(!cs->nr_segs);
651 cs->currbuf = buf;
652 cs->len = buf->len;
653 cs->pipebufs++;
654 cs->nr_segs--;
655
656 if (cs->len != PAGE_SIZE)
657 goto out_fallback;
658
659 if (buf->ops->steal(cs->pipe, buf) != 0)
660 goto out_fallback;
661
662 newpage = buf->page;
663
664 if (WARN_ON(!PageUptodate(newpage)))
665 return -EIO;
666
667 ClearPageMappedToDisk(newpage);
668
669 if (fuse_check_page(newpage) != 0)
670 goto out_fallback_unlock;
671
672 mapping = oldpage->mapping;
673 index = oldpage->index;
674
675 /*
676 * This is a new and locked page, it shouldn't be mapped or
677 * have any special flags on it
678 */
679 if (WARN_ON(page_mapped(oldpage)))
680 goto out_fallback_unlock;
681 if (WARN_ON(page_has_private(oldpage)))
682 goto out_fallback_unlock;
683 if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
684 goto out_fallback_unlock;
685 if (WARN_ON(PageMlocked(oldpage)))
686 goto out_fallback_unlock;
687
688 remove_from_page_cache(oldpage);
689 page_cache_release(oldpage);
690
691 err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
692 if (err) {
693 printk(KERN_WARNING "fuse_try_move_page: failed to add page");
694 goto out_fallback_unlock;
695 }
696 page_cache_get(newpage);
697
698 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
699 lru_cache_add_file(newpage);
700
701 err = 0;
702 spin_lock(&cs->fc->lock);
703 if (cs->req->aborted)
704 err = -ENOENT;
705 else
706 *pagep = newpage;
707 spin_unlock(&cs->fc->lock);
708
709 if (err) {
710 unlock_page(newpage);
711 page_cache_release(newpage);
712 return err;
713 }
714
715 unlock_page(oldpage);
716 page_cache_release(oldpage);
717 cs->len = 0;
718
719 return 0;
720
721out_fallback_unlock:
722 unlock_page(newpage);
723out_fallback:
724 cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
725 cs->buf = cs->mapaddr + buf->offset;
726
727 err = lock_request(cs->fc, cs->req);
728 if (err)
729 return err;
730
731 return 1;
732}
733
612/* 734/*
613 * Copy a page in the request to/from the userspace buffer. Must be 735 * Copy a page in the request to/from the userspace buffer. Must be
614 * done atomically 736 * done atomically
615 */ 737 */
616static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, 738static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
617 unsigned offset, unsigned count, int zeroing) 739 unsigned offset, unsigned count, int zeroing)
618{ 740{
741 int err;
742 struct page *page = *pagep;
743
619 if (page && zeroing && count < PAGE_SIZE) { 744 if (page && zeroing && count < PAGE_SIZE) {
620 void *mapaddr = kmap_atomic(page, KM_USER1); 745 void *mapaddr = kmap_atomic(page, KM_USER1);
621 memset(mapaddr, 0, PAGE_SIZE); 746 memset(mapaddr, 0, PAGE_SIZE);
@@ -623,9 +748,16 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
623 } 748 }
624 while (count) { 749 while (count) {
625 if (!cs->len) { 750 if (!cs->len) {
626 int err = fuse_copy_fill(cs); 751 if (cs->move_pages && page &&
627 if (err) 752 offset == 0 && count == PAGE_SIZE) {
628 return err; 753 err = fuse_try_move_page(cs, pagep);
754 if (err <= 0)
755 return err;
756 } else {
757 err = fuse_copy_fill(cs);
758 if (err)
759 return err;
760 }
629 } 761 }
630 if (page) { 762 if (page) {
631 void *mapaddr = kmap_atomic(page, KM_USER1); 763 void *mapaddr = kmap_atomic(page, KM_USER1);
@@ -650,8 +782,10 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
650 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); 782 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
651 783
652 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 784 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
653 struct page *page = req->pages[i]; 785 int err;
654 int err = fuse_copy_page(cs, page, offset, count, zeroing); 786
787 err = fuse_copy_page(cs, &req->pages[i], offset, count,
788 zeroing);
655 if (err) 789 if (err)
656 return err; 790 return err;
657 791
@@ -1079,6 +1213,8 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1079 req->out.h = oh; 1213 req->out.h = oh;
1080 req->locked = 1; 1214 req->locked = 1;
1081 cs->req = req; 1215 cs->req = req;
1216 if (!req->out.page_replace)
1217 cs->move_pages = 0;
1082 spin_unlock(&fc->lock); 1218 spin_unlock(&fc->lock);
1083 1219
1084 err = copy_out_args(cs, &req->out, nbytes); 1220 err = copy_out_args(cs, &req->out, nbytes);
@@ -1182,6 +1318,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1182 cs.nr_segs = nbuf; 1318 cs.nr_segs = nbuf;
1183 cs.pipe = pipe; 1319 cs.pipe = pipe;
1184 1320
1321 if (flags & SPLICE_F_MOVE)
1322 cs.move_pages = 1;
1323
1185 ret = fuse_dev_do_write(fc, &cs, len); 1324 ret = fuse_dev_do_write(fc, &cs, len);
1186 1325
1187 for (idx = 0; idx < nbuf; idx++) { 1326 for (idx = 0; idx < nbuf; idx++) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9ca68edcbdbe..06e3775b2282 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -517,17 +517,26 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
517 int i; 517 int i;
518 size_t count = req->misc.read.in.size; 518 size_t count = req->misc.read.in.size;
519 size_t num_read = req->out.args[0].size; 519 size_t num_read = req->out.args[0].size;
520 struct inode *inode = req->pages[0]->mapping->host; 520 struct address_space *mapping = NULL;
521 521
522 /* 522 for (i = 0; mapping == NULL && i < req->num_pages; i++)
523 * Short read means EOF. If file size is larger, truncate it 523 mapping = req->pages[i]->mapping;
524 */
525 if (!req->out.h.error && num_read < count) {
526 loff_t pos = page_offset(req->pages[0]) + num_read;
527 fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
528 }
529 524
530 fuse_invalidate_attr(inode); /* atime changed */ 525 if (mapping) {
526 struct inode *inode = mapping->host;
527
528 /*
529 * Short read means EOF. If file size is larger, truncate it
530 */
531 if (!req->out.h.error && num_read < count) {
532 loff_t pos;
533
534 pos = page_offset(req->pages[0]) + num_read;
535 fuse_read_update_size(inode, pos,
536 req->misc.read.attr_ver);
537 }
538 fuse_invalidate_attr(inode); /* atime changed */
539 }
531 540
532 for (i = 0; i < req->num_pages; i++) { 541 for (i = 0; i < req->num_pages; i++) {
533 struct page *page = req->pages[i]; 542 struct page *page = req->pages[i];
@@ -551,6 +560,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
551 560
552 req->out.argpages = 1; 561 req->out.argpages = 1;
553 req->out.page_zeroing = 1; 562 req->out.page_zeroing = 1;
563 req->out.page_replace = 1;
554 fuse_read_fill(req, file, pos, count, FUSE_READ); 564 fuse_read_fill(req, file, pos, count, FUSE_READ);
555 req->misc.read.attr_ver = fuse_get_attr_version(fc); 565 req->misc.read.attr_ver = fuse_get_attr_version(fc);
556 if (fc->async_read) { 566 if (fc->async_read) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 01cc462ff45d..9d0a51852d8a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -177,6 +177,9 @@ struct fuse_out {
177 /** Zero partially or not copied pages */ 177 /** Zero partially or not copied pages */
178 unsigned page_zeroing:1; 178 unsigned page_zeroing:1;
179 179
180 /** Pages may be replaced with new ones */
181 unsigned page_replace:1;
182
180 /** Number or arguments */ 183 /** Number or arguments */
181 unsigned numargs; 184 unsigned numargs;
182 185