diff options
Diffstat (limited to 'fs/fuse')
-rw-r--r-- | fs/fuse/dev.c | 19 | ||||
-rw-r--r-- | fs/fuse/dir.c | 84 | ||||
-rw-r--r-- | fs/fuse/file.c | 321 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 37 | ||||
-rw-r--r-- | fs/fuse/inode.c | 49 |
5 files changed, 481 insertions, 29 deletions
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index af639807524e..bba83762c484 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void) | |||
47 | return req; | 47 | return req; |
48 | } | 48 | } |
49 | 49 | ||
50 | struct fuse_req *fuse_request_alloc_nofs(void) | ||
51 | { | ||
52 | struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); | ||
53 | if (req) | ||
54 | fuse_request_init(req); | ||
55 | return req; | ||
56 | } | ||
57 | |||
50 | void fuse_request_free(struct fuse_req *req) | 58 | void fuse_request_free(struct fuse_req *req) |
51 | { | 59 | { |
52 | kmem_cache_free(fuse_req_cachep, req); | 60 | kmem_cache_free(fuse_req_cachep, req); |
@@ -430,6 +438,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) | |||
430 | } | 438 | } |
431 | 439 | ||
432 | /* | 440 | /* |
441 | * Called under fc->lock | ||
442 | * | ||
443 | * fc->connected must have been checked previously | ||
444 | */ | ||
445 | void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req) | ||
446 | { | ||
447 | req->isreply = 1; | ||
448 | request_send_nowait_locked(fc, req); | ||
449 | } | ||
450 | |||
451 | /* | ||
433 | * Lock the request. Up to the next unlock_request() there mustn't be | 452 | * Lock the request. Up to the next unlock_request() there mustn't be |
434 | * anything that could cause a page-fault. If the request was already | 453 | * anything that could cause a page-fault. If the request was already |
435 | * aborted bail out. | 454 | * aborted bail out. |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c4807b3fc8a3..48b9971ecd97 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) | |||
1107 | } | 1107 | } |
1108 | 1108 | ||
1109 | /* | 1109 | /* |
1110 | * Prevent concurrent writepages on inode | ||
1111 | * | ||
1112 | * This is done by adding a negative bias to the inode write counter | ||
1113 | * and waiting for all pending writes to finish. | ||
1114 | */ | ||
1115 | void fuse_set_nowrite(struct inode *inode) | ||
1116 | { | ||
1117 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
1118 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1119 | |||
1120 | BUG_ON(!mutex_is_locked(&inode->i_mutex)); | ||
1121 | |||
1122 | spin_lock(&fc->lock); | ||
1123 | BUG_ON(fi->writectr < 0); | ||
1124 | fi->writectr += FUSE_NOWRITE; | ||
1125 | spin_unlock(&fc->lock); | ||
1126 | wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE); | ||
1127 | } | ||
1128 | |||
1129 | /* | ||
1130 | * Allow writepages on inode | ||
1131 | * | ||
1132 | * Remove the bias from the writecounter and send any queued | ||
1133 | * writepages. | ||
1134 | */ | ||
1135 | static void __fuse_release_nowrite(struct inode *inode) | ||
1136 | { | ||
1137 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1138 | |||
1139 | BUG_ON(fi->writectr != FUSE_NOWRITE); | ||
1140 | fi->writectr = 0; | ||
1141 | fuse_flush_writepages(inode); | ||
1142 | } | ||
1143 | |||
1144 | void fuse_release_nowrite(struct inode *inode) | ||
1145 | { | ||
1146 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
1147 | |||
1148 | spin_lock(&fc->lock); | ||
1149 | __fuse_release_nowrite(inode); | ||
1150 | spin_unlock(&fc->lock); | ||
1151 | } | ||
1152 | |||
1153 | /* | ||
1110 | * Set attributes, and at the same time refresh them. | 1154 | * Set attributes, and at the same time refresh them. |
1111 | * | 1155 | * |
1112 | * Truncation is slightly complicated, because the 'truncate' request | 1156 | * Truncation is slightly complicated, because the 'truncate' request |
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, | |||
1122 | struct fuse_req *req; | 1166 | struct fuse_req *req; |
1123 | struct fuse_setattr_in inarg; | 1167 | struct fuse_setattr_in inarg; |
1124 | struct fuse_attr_out outarg; | 1168 | struct fuse_attr_out outarg; |
1169 | bool is_truncate = false; | ||
1170 | loff_t oldsize; | ||
1125 | int err; | 1171 | int err; |
1126 | 1172 | ||
1127 | if (!fuse_allow_task(fc, current)) | 1173 | if (!fuse_allow_task(fc, current)) |
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, | |||
1145 | send_sig(SIGXFSZ, current, 0); | 1191 | send_sig(SIGXFSZ, current, 0); |
1146 | return -EFBIG; | 1192 | return -EFBIG; |
1147 | } | 1193 | } |
1194 | is_truncate = true; | ||
1148 | } | 1195 | } |
1149 | 1196 | ||
1150 | req = fuse_get_req(fc); | 1197 | req = fuse_get_req(fc); |
1151 | if (IS_ERR(req)) | 1198 | if (IS_ERR(req)) |
1152 | return PTR_ERR(req); | 1199 | return PTR_ERR(req); |
1153 | 1200 | ||
1201 | if (is_truncate) | ||
1202 | fuse_set_nowrite(inode); | ||
1203 | |||
1154 | memset(&inarg, 0, sizeof(inarg)); | 1204 | memset(&inarg, 0, sizeof(inarg)); |
1155 | memset(&outarg, 0, sizeof(outarg)); | 1205 | memset(&outarg, 0, sizeof(outarg)); |
1156 | iattr_to_fattr(attr, &inarg); | 1206 | iattr_to_fattr(attr, &inarg); |
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, | |||
1181 | if (err) { | 1231 | if (err) { |
1182 | if (err == -EINTR) | 1232 | if (err == -EINTR) |
1183 | fuse_invalidate_attr(inode); | 1233 | fuse_invalidate_attr(inode); |
1184 | return err; | 1234 | goto error; |
1185 | } | 1235 | } |
1186 | 1236 | ||
1187 | if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { | 1237 | if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { |
1188 | make_bad_inode(inode); | 1238 | make_bad_inode(inode); |
1189 | return -EIO; | 1239 | err = -EIO; |
1240 | goto error; | ||
1241 | } | ||
1242 | |||
1243 | spin_lock(&fc->lock); | ||
1244 | fuse_change_attributes_common(inode, &outarg.attr, | ||
1245 | attr_timeout(&outarg)); | ||
1246 | oldsize = inode->i_size; | ||
1247 | i_size_write(inode, outarg.attr.size); | ||
1248 | |||
1249 | if (is_truncate) { | ||
1250 | /* NOTE: this may release/reacquire fc->lock */ | ||
1251 | __fuse_release_nowrite(inode); | ||
1252 | } | ||
1253 | spin_unlock(&fc->lock); | ||
1254 | |||
1255 | /* | ||
1256 | * Only call invalidate_inode_pages2() after removing | ||
1257 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. | ||
1258 | */ | ||
1259 | if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { | ||
1260 | if (outarg.attr.size < oldsize) | ||
1261 | fuse_truncate(inode->i_mapping, outarg.attr.size); | ||
1262 | invalidate_inode_pages2(inode->i_mapping); | ||
1190 | } | 1263 | } |
1191 | 1264 | ||
1192 | fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0); | ||
1193 | return 0; | 1265 | return 0; |
1266 | |||
1267 | error: | ||
1268 | if (is_truncate) | ||
1269 | fuse_release_nowrite(inode); | ||
1270 | |||
1271 | return err; | ||
1194 | } | 1272 | } |
1195 | 1273 | ||
1196 | static int fuse_setattr(struct dentry *entry, struct iattr *attr) | 1274 | static int fuse_setattr(struct dentry *entry, struct iattr *attr) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 676b0bc8a86d..68051f3bdf91 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) | |||
210 | return (u64) v0 + ((u64) v1 << 32); | 210 | return (u64) v0 + ((u64) v1 << 32); |
211 | } | 211 | } |
212 | 212 | ||
213 | /* | ||
214 | * Check if page is under writeback | ||
215 | * | ||
216 | * This is currently done by walking the list of writepage requests | ||
217 | * for the inode, which can be pretty inefficient. | ||
218 | */ | ||
219 | static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | ||
220 | { | ||
221 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
222 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
223 | struct fuse_req *req; | ||
224 | bool found = false; | ||
225 | |||
226 | spin_lock(&fc->lock); | ||
227 | list_for_each_entry(req, &fi->writepages, writepages_entry) { | ||
228 | pgoff_t curr_index; | ||
229 | |||
230 | BUG_ON(req->inode != inode); | ||
231 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; | ||
232 | if (curr_index == index) { | ||
233 | found = true; | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | spin_unlock(&fc->lock); | ||
238 | |||
239 | return found; | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * Wait for page writeback to be completed. | ||
244 | * | ||
245 | * Since fuse doesn't rely on the VM writeback tracking, this has to | ||
246 | * use some other means. | ||
247 | */ | ||
248 | static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) | ||
249 | { | ||
250 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
251 | |||
252 | wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
213 | static int fuse_flush(struct file *file, fl_owner_t id) | 256 | static int fuse_flush(struct file *file, fl_owner_t id) |
214 | { | 257 | { |
215 | struct inode *inode = file->f_path.dentry->d_inode; | 258 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
245 | return err; | 288 | return err; |
246 | } | 289 | } |
247 | 290 | ||
291 | /* | ||
292 | * Wait for all pending writepages on the inode to finish. | ||
293 | * | ||
294 | * This is currently done by blocking further writes with FUSE_NOWRITE | ||
295 | * and waiting for all sent writes to complete. | ||
296 | * | ||
297 | * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage | ||
298 | * could conflict with truncation. | ||
299 | */ | ||
300 | static void fuse_sync_writes(struct inode *inode) | ||
301 | { | ||
302 | fuse_set_nowrite(inode); | ||
303 | fuse_release_nowrite(inode); | ||
304 | } | ||
305 | |||
248 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | 306 | int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, |
249 | int isdir) | 307 | int isdir) |
250 | { | 308 | { |
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, | |||
261 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) | 319 | if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) |
262 | return 0; | 320 | return 0; |
263 | 321 | ||
322 | /* | ||
323 | * Start writeback against all dirty pages of the inode, then | ||
324 | * wait for all outstanding writes, before sending the FSYNC | ||
325 | * request. | ||
326 | */ | ||
327 | err = write_inode_now(inode, 0); | ||
328 | if (err) | ||
329 | return err; | ||
330 | |||
331 | fuse_sync_writes(inode); | ||
332 | |||
264 | req = fuse_get_req(fc); | 333 | req = fuse_get_req(fc); |
265 | if (IS_ERR(req)) | 334 | if (IS_ERR(req)) |
266 | return PTR_ERR(req); | 335 | return PTR_ERR(req); |
@@ -340,6 +409,13 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
340 | if (is_bad_inode(inode)) | 409 | if (is_bad_inode(inode)) |
341 | goto out; | 410 | goto out; |
342 | 411 | ||
412 | /* | ||
413 | * Page writeback can extend beyond the liftime of the | ||
414 | * page-cache page, so make sure we read a properly synced | ||
415 | * page. | ||
416 | */ | ||
417 | fuse_wait_on_page_writeback(inode, page->index); | ||
418 | |||
343 | req = fuse_get_req(fc); | 419 | req = fuse_get_req(fc); |
344 | err = PTR_ERR(req); | 420 | err = PTR_ERR(req); |
345 | if (IS_ERR(req)) | 421 | if (IS_ERR(req)) |
@@ -411,6 +487,8 @@ static int fuse_readpages_fill(void *_data, struct page *page) | |||
411 | struct inode *inode = data->inode; | 487 | struct inode *inode = data->inode; |
412 | struct fuse_conn *fc = get_fuse_conn(inode); | 488 | struct fuse_conn *fc = get_fuse_conn(inode); |
413 | 489 | ||
490 | fuse_wait_on_page_writeback(inode, page->index); | ||
491 | |||
414 | if (req->num_pages && | 492 | if (req->num_pages && |
415 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || | 493 | (req->num_pages == FUSE_MAX_PAGES_PER_REQ || |
416 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || | 494 | (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || |
@@ -477,11 +555,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
477 | } | 555 | } |
478 | 556 | ||
479 | static void fuse_write_fill(struct fuse_req *req, struct file *file, | 557 | static void fuse_write_fill(struct fuse_req *req, struct file *file, |
480 | struct inode *inode, loff_t pos, size_t count, | 558 | struct fuse_file *ff, struct inode *inode, |
481 | int writepage) | 559 | loff_t pos, size_t count, int writepage) |
482 | { | 560 | { |
483 | struct fuse_conn *fc = get_fuse_conn(inode); | 561 | struct fuse_conn *fc = get_fuse_conn(inode); |
484 | struct fuse_file *ff = file->private_data; | ||
485 | struct fuse_write_in *inarg = &req->misc.write.in; | 562 | struct fuse_write_in *inarg = &req->misc.write.in; |
486 | struct fuse_write_out *outarg = &req->misc.write.out; | 563 | struct fuse_write_out *outarg = &req->misc.write.out; |
487 | 564 | ||
@@ -490,7 +567,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file, | |||
490 | inarg->offset = pos; | 567 | inarg->offset = pos; |
491 | inarg->size = count; | 568 | inarg->size = count; |
492 | inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; | 569 | inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; |
493 | inarg->flags = file->f_flags; | 570 | inarg->flags = file ? file->f_flags : 0; |
494 | req->in.h.opcode = FUSE_WRITE; | 571 | req->in.h.opcode = FUSE_WRITE; |
495 | req->in.h.nodeid = get_node_id(inode); | 572 | req->in.h.nodeid = get_node_id(inode); |
496 | req->in.argpages = 1; | 573 | req->in.argpages = 1; |
@@ -511,7 +588,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, | |||
511 | fl_owner_t owner) | 588 | fl_owner_t owner) |
512 | { | 589 | { |
513 | struct fuse_conn *fc = get_fuse_conn(inode); | 590 | struct fuse_conn *fc = get_fuse_conn(inode); |
514 | fuse_write_fill(req, file, inode, pos, count, 0); | 591 | fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); |
515 | if (owner != NULL) { | 592 | if (owner != NULL) { |
516 | struct fuse_write_in *inarg = &req->misc.write.in; | 593 | struct fuse_write_in *inarg = &req->misc.write.in; |
517 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; | 594 | inarg->write_flags |= FUSE_WRITE_LOCKOWNER; |
@@ -546,6 +623,12 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, | |||
546 | if (is_bad_inode(inode)) | 623 | if (is_bad_inode(inode)) |
547 | return -EIO; | 624 | return -EIO; |
548 | 625 | ||
626 | /* | ||
627 | * Make sure writepages on the same page are not mixed up with | ||
628 | * plain writes. | ||
629 | */ | ||
630 | fuse_wait_on_page_writeback(inode, page->index); | ||
631 | |||
549 | req = fuse_get_req(fc); | 632 | req = fuse_get_req(fc); |
550 | if (IS_ERR(req)) | 633 | if (IS_ERR(req)) |
551 | return PTR_ERR(req); | 634 | return PTR_ERR(req); |
@@ -716,21 +799,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, | |||
716 | return res; | 799 | return res; |
717 | } | 800 | } |
718 | 801 | ||
719 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | 802 | static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) |
720 | { | 803 | { |
721 | if ((vma->vm_flags & VM_SHARED)) { | 804 | __free_page(req->pages[0]); |
722 | if ((vma->vm_flags & VM_WRITE)) | 805 | fuse_file_put(req->ff); |
723 | return -ENODEV; | 806 | fuse_put_request(fc, req); |
724 | else | 807 | } |
725 | vma->vm_flags &= ~VM_MAYWRITE; | 808 | |
809 | static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) | ||
810 | { | ||
811 | struct inode *inode = req->inode; | ||
812 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
813 | struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; | ||
814 | |||
815 | list_del(&req->writepages_entry); | ||
816 | dec_bdi_stat(bdi, BDI_WRITEBACK); | ||
817 | dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); | ||
818 | bdi_writeout_inc(bdi); | ||
819 | wake_up(&fi->page_waitq); | ||
820 | } | ||
821 | |||
822 | /* Called under fc->lock, may release and reacquire it */ | ||
823 | static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) | ||
824 | { | ||
825 | struct fuse_inode *fi = get_fuse_inode(req->inode); | ||
826 | loff_t size = i_size_read(req->inode); | ||
827 | struct fuse_write_in *inarg = &req->misc.write.in; | ||
828 | |||
829 | if (!fc->connected) | ||
830 | goto out_free; | ||
831 | |||
832 | if (inarg->offset + PAGE_CACHE_SIZE <= size) { | ||
833 | inarg->size = PAGE_CACHE_SIZE; | ||
834 | } else if (inarg->offset < size) { | ||
835 | inarg->size = size & (PAGE_CACHE_SIZE - 1); | ||
836 | } else { | ||
837 | /* Got truncated off completely */ | ||
838 | goto out_free; | ||
726 | } | 839 | } |
727 | return generic_file_mmap(file, vma); | 840 | |
841 | req->in.args[1].size = inarg->size; | ||
842 | fi->writectr++; | ||
843 | request_send_background_locked(fc, req); | ||
844 | return; | ||
845 | |||
846 | out_free: | ||
847 | fuse_writepage_finish(fc, req); | ||
848 | spin_unlock(&fc->lock); | ||
849 | fuse_writepage_free(fc, req); | ||
850 | spin_lock(&fc->lock); | ||
728 | } | 851 | } |
729 | 852 | ||
730 | static int fuse_set_page_dirty(struct page *page) | 853 | /* |
854 | * If fi->writectr is positive (no truncate or fsync going on) send | ||
855 | * all queued writepage requests. | ||
856 | * | ||
857 | * Called with fc->lock | ||
858 | */ | ||
859 | void fuse_flush_writepages(struct inode *inode) | ||
731 | { | 860 | { |
732 | printk("fuse_set_page_dirty: should not happen\n"); | 861 | struct fuse_conn *fc = get_fuse_conn(inode); |
733 | dump_stack(); | 862 | struct fuse_inode *fi = get_fuse_inode(inode); |
863 | struct fuse_req *req; | ||
864 | |||
865 | while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { | ||
866 | req = list_entry(fi->queued_writes.next, struct fuse_req, list); | ||
867 | list_del_init(&req->list); | ||
868 | fuse_send_writepage(fc, req); | ||
869 | } | ||
870 | } | ||
871 | |||
872 | static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) | ||
873 | { | ||
874 | struct inode *inode = req->inode; | ||
875 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
876 | |||
877 | mapping_set_error(inode->i_mapping, req->out.h.error); | ||
878 | spin_lock(&fc->lock); | ||
879 | fi->writectr--; | ||
880 | fuse_writepage_finish(fc, req); | ||
881 | spin_unlock(&fc->lock); | ||
882 | fuse_writepage_free(fc, req); | ||
883 | } | ||
884 | |||
885 | static int fuse_writepage_locked(struct page *page) | ||
886 | { | ||
887 | struct address_space *mapping = page->mapping; | ||
888 | struct inode *inode = mapping->host; | ||
889 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
890 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
891 | struct fuse_req *req; | ||
892 | struct fuse_file *ff; | ||
893 | struct page *tmp_page; | ||
894 | |||
895 | set_page_writeback(page); | ||
896 | |||
897 | req = fuse_request_alloc_nofs(); | ||
898 | if (!req) | ||
899 | goto err; | ||
900 | |||
901 | tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
902 | if (!tmp_page) | ||
903 | goto err_free; | ||
904 | |||
905 | spin_lock(&fc->lock); | ||
906 | BUG_ON(list_empty(&fi->write_files)); | ||
907 | ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); | ||
908 | req->ff = fuse_file_get(ff); | ||
909 | spin_unlock(&fc->lock); | ||
910 | |||
911 | fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); | ||
912 | |||
913 | copy_highpage(tmp_page, page); | ||
914 | req->num_pages = 1; | ||
915 | req->pages[0] = tmp_page; | ||
916 | req->page_offset = 0; | ||
917 | req->end = fuse_writepage_end; | ||
918 | req->inode = inode; | ||
919 | |||
920 | inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); | ||
921 | inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); | ||
922 | end_page_writeback(page); | ||
923 | |||
924 | spin_lock(&fc->lock); | ||
925 | list_add(&req->writepages_entry, &fi->writepages); | ||
926 | list_add_tail(&req->list, &fi->queued_writes); | ||
927 | fuse_flush_writepages(inode); | ||
928 | spin_unlock(&fc->lock); | ||
929 | |||
930 | return 0; | ||
931 | |||
932 | err_free: | ||
933 | fuse_request_free(req); | ||
934 | err: | ||
935 | end_page_writeback(page); | ||
936 | return -ENOMEM; | ||
937 | } | ||
938 | |||
939 | static int fuse_writepage(struct page *page, struct writeback_control *wbc) | ||
940 | { | ||
941 | int err; | ||
942 | |||
943 | err = fuse_writepage_locked(page); | ||
944 | unlock_page(page); | ||
945 | |||
946 | return err; | ||
947 | } | ||
948 | |||
949 | static int fuse_launder_page(struct page *page) | ||
950 | { | ||
951 | int err = 0; | ||
952 | if (clear_page_dirty_for_io(page)) { | ||
953 | struct inode *inode = page->mapping->host; | ||
954 | err = fuse_writepage_locked(page); | ||
955 | if (!err) | ||
956 | fuse_wait_on_page_writeback(inode, page->index); | ||
957 | } | ||
958 | return err; | ||
959 | } | ||
960 | |||
961 | /* | ||
962 | * Write back dirty pages now, because there may not be any suitable | ||
963 | * open files later | ||
964 | */ | ||
965 | static void fuse_vma_close(struct vm_area_struct *vma) | ||
966 | { | ||
967 | filemap_write_and_wait(vma->vm_file->f_mapping); | ||
968 | } | ||
969 | |||
970 | /* | ||
971 | * Wait for writeback against this page to complete before allowing it | ||
972 | * to be marked dirty again, and hence written back again, possibly | ||
973 | * before the previous writepage completed. | ||
974 | * | ||
975 | * Block here, instead of in ->writepage(), so that the userspace fs | ||
976 | * can only block processes actually operating on the filesystem. | ||
977 | * | ||
978 | * Otherwise unprivileged userspace fs would be able to block | ||
979 | * unrelated: | ||
980 | * | ||
981 | * - page migration | ||
982 | * - sync(2) | ||
983 | * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER | ||
984 | */ | ||
985 | static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
986 | { | ||
987 | /* | ||
988 | * Don't use page->mapping as it may become NULL from a | ||
989 | * concurrent truncate. | ||
990 | */ | ||
991 | struct inode *inode = vma->vm_file->f_mapping->host; | ||
992 | |||
993 | fuse_wait_on_page_writeback(inode, page->index); | ||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | static struct vm_operations_struct fuse_file_vm_ops = { | ||
998 | .close = fuse_vma_close, | ||
999 | .fault = filemap_fault, | ||
1000 | .page_mkwrite = fuse_page_mkwrite, | ||
1001 | }; | ||
1002 | |||
1003 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
1004 | { | ||
1005 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { | ||
1006 | struct inode *inode = file->f_dentry->d_inode; | ||
1007 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
1008 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1009 | struct fuse_file *ff = file->private_data; | ||
1010 | /* | ||
1011 | * file may be written through mmap, so chain it onto the | ||
1012 | * inodes's write_file list | ||
1013 | */ | ||
1014 | spin_lock(&fc->lock); | ||
1015 | if (list_empty(&ff->write_entry)) | ||
1016 | list_add(&ff->write_entry, &fi->write_files); | ||
1017 | spin_unlock(&fc->lock); | ||
1018 | } | ||
1019 | file_accessed(file); | ||
1020 | vma->vm_ops = &fuse_file_vm_ops; | ||
734 | return 0; | 1021 | return 0; |
735 | } | 1022 | } |
736 | 1023 | ||
@@ -940,10 +1227,12 @@ static const struct file_operations fuse_direct_io_file_operations = { | |||
940 | 1227 | ||
941 | static const struct address_space_operations fuse_file_aops = { | 1228 | static const struct address_space_operations fuse_file_aops = { |
942 | .readpage = fuse_readpage, | 1229 | .readpage = fuse_readpage, |
1230 | .writepage = fuse_writepage, | ||
1231 | .launder_page = fuse_launder_page, | ||
943 | .write_begin = fuse_write_begin, | 1232 | .write_begin = fuse_write_begin, |
944 | .write_end = fuse_write_end, | 1233 | .write_end = fuse_write_end, |
945 | .readpages = fuse_readpages, | 1234 | .readpages = fuse_readpages, |
946 | .set_page_dirty = fuse_set_page_dirty, | 1235 | .set_page_dirty = __set_page_dirty_nobuffers, |
947 | .bmap = fuse_bmap, | 1236 | .bmap = fuse_bmap, |
948 | }; | 1237 | }; |
949 | 1238 | ||
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index c0481e48d161..4b094fbc9c7f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
17 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
18 | #include <linux/rwsem.h> | ||
18 | 19 | ||
19 | /** Max number of pages that can be used in a single read request */ | 20 | /** Max number of pages that can be used in a single read request */ |
20 | #define FUSE_MAX_PAGES_PER_REQ 32 | 21 | #define FUSE_MAX_PAGES_PER_REQ 32 |
@@ -25,6 +26,9 @@ | |||
25 | /** Congestion starts at 75% of maximum */ | 26 | /** Congestion starts at 75% of maximum */ |
26 | #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) | 27 | #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) |
27 | 28 | ||
29 | /** Bias for fi->writectr, meaning new writepages must not be sent */ | ||
30 | #define FUSE_NOWRITE INT_MIN | ||
31 | |||
28 | /** It could be as large as PATH_MAX, but would that have any uses? */ | 32 | /** It could be as large as PATH_MAX, but would that have any uses? */ |
29 | #define FUSE_NAME_MAX 1024 | 33 | #define FUSE_NAME_MAX 1024 |
30 | 34 | ||
@@ -73,6 +77,19 @@ struct fuse_inode { | |||
73 | 77 | ||
74 | /** Files usable in writepage. Protected by fc->lock */ | 78 | /** Files usable in writepage. Protected by fc->lock */ |
75 | struct list_head write_files; | 79 | struct list_head write_files; |
80 | |||
81 | /** Writepages pending on truncate or fsync */ | ||
82 | struct list_head queued_writes; | ||
83 | |||
84 | /** Number of sent writes, a negative bias (FUSE_NOWRITE) | ||
85 | * means more writes are blocked */ | ||
86 | int writectr; | ||
87 | |||
88 | /** Waitq for writepage completion */ | ||
89 | wait_queue_head_t page_waitq; | ||
90 | |||
91 | /** List of writepage requestst (pending or sent) */ | ||
92 | struct list_head writepages; | ||
76 | }; | 93 | }; |
77 | 94 | ||
78 | /** FUSE specific file data */ | 95 | /** FUSE specific file data */ |
@@ -242,6 +259,12 @@ struct fuse_req { | |||
242 | /** File used in the request (or NULL) */ | 259 | /** File used in the request (or NULL) */ |
243 | struct fuse_file *ff; | 260 | struct fuse_file *ff; |
244 | 261 | ||
262 | /** Inode used in the request or NULL */ | ||
263 | struct inode *inode; | ||
264 | |||
265 | /** Link on fi->writepages */ | ||
266 | struct list_head writepages_entry; | ||
267 | |||
245 | /** Request completion callback */ | 268 | /** Request completion callback */ |
246 | void (*end)(struct fuse_conn *, struct fuse_req *); | 269 | void (*end)(struct fuse_conn *, struct fuse_req *); |
247 | 270 | ||
@@ -504,6 +527,11 @@ void fuse_init_symlink(struct inode *inode); | |||
504 | void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | 527 | void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, |
505 | u64 attr_valid, u64 attr_version); | 528 | u64 attr_valid, u64 attr_version); |
506 | 529 | ||
530 | void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | ||
531 | u64 attr_valid); | ||
532 | |||
533 | void fuse_truncate(struct address_space *mapping, loff_t offset); | ||
534 | |||
507 | /** | 535 | /** |
508 | * Initialize the client device | 536 | * Initialize the client device |
509 | */ | 537 | */ |
@@ -522,6 +550,8 @@ void fuse_ctl_cleanup(void); | |||
522 | */ | 550 | */ |
523 | struct fuse_req *fuse_request_alloc(void); | 551 | struct fuse_req *fuse_request_alloc(void); |
524 | 552 | ||
553 | struct fuse_req *fuse_request_alloc_nofs(void); | ||
554 | |||
525 | /** | 555 | /** |
526 | * Free a request | 556 | * Free a request |
527 | */ | 557 | */ |
@@ -558,6 +588,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); | |||
558 | */ | 588 | */ |
559 | void request_send_background(struct fuse_conn *fc, struct fuse_req *req); | 589 | void request_send_background(struct fuse_conn *fc, struct fuse_req *req); |
560 | 590 | ||
591 | void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req); | ||
592 | |||
561 | /* Abort all requests */ | 593 | /* Abort all requests */ |
562 | void fuse_abort_conn(struct fuse_conn *fc); | 594 | void fuse_abort_conn(struct fuse_conn *fc); |
563 | 595 | ||
@@ -600,3 +632,8 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); | |||
600 | 632 | ||
601 | int fuse_update_attributes(struct inode *inode, struct kstat *stat, | 633 | int fuse_update_attributes(struct inode *inode, struct kstat *stat, |
602 | struct file *file, bool *refreshed); | 634 | struct file *file, bool *refreshed); |
635 | |||
636 | void fuse_flush_writepages(struct inode *inode); | ||
637 | |||
638 | void fuse_set_nowrite(struct inode *inode); | ||
639 | void fuse_release_nowrite(struct inode *inode); | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c4fcfd59cd80..7d01c68852a8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) | |||
59 | fi->nodeid = 0; | 59 | fi->nodeid = 0; |
60 | fi->nlookup = 0; | 60 | fi->nlookup = 0; |
61 | fi->attr_version = 0; | 61 | fi->attr_version = 0; |
62 | fi->writectr = 0; | ||
62 | INIT_LIST_HEAD(&fi->write_files); | 63 | INIT_LIST_HEAD(&fi->write_files); |
64 | INIT_LIST_HEAD(&fi->queued_writes); | ||
65 | INIT_LIST_HEAD(&fi->writepages); | ||
66 | init_waitqueue_head(&fi->page_waitq); | ||
63 | fi->forget_req = fuse_request_alloc(); | 67 | fi->forget_req = fuse_request_alloc(); |
64 | if (!fi->forget_req) { | 68 | if (!fi->forget_req) { |
65 | kmem_cache_free(fuse_inode_cachep, inode); | 69 | kmem_cache_free(fuse_inode_cachep, inode); |
@@ -73,6 +77,7 @@ static void fuse_destroy_inode(struct inode *inode) | |||
73 | { | 77 | { |
74 | struct fuse_inode *fi = get_fuse_inode(inode); | 78 | struct fuse_inode *fi = get_fuse_inode(inode); |
75 | BUG_ON(!list_empty(&fi->write_files)); | 79 | BUG_ON(!list_empty(&fi->write_files)); |
80 | BUG_ON(!list_empty(&fi->queued_writes)); | ||
76 | if (fi->forget_req) | 81 | if (fi->forget_req) |
77 | fuse_request_free(fi->forget_req); | 82 | fuse_request_free(fi->forget_req); |
78 | kmem_cache_free(fuse_inode_cachep, inode); | 83 | kmem_cache_free(fuse_inode_cachep, inode); |
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) | |||
109 | return 0; | 114 | return 0; |
110 | } | 115 | } |
111 | 116 | ||
112 | static void fuse_truncate(struct address_space *mapping, loff_t offset) | 117 | void fuse_truncate(struct address_space *mapping, loff_t offset) |
113 | { | 118 | { |
114 | /* See vmtruncate() */ | 119 | /* See vmtruncate() */ |
115 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | 120 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); |
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset) | |||
117 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); | 122 | unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); |
118 | } | 123 | } |
119 | 124 | ||
120 | 125 | void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | |
121 | void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | 126 | u64 attr_valid) |
122 | u64 attr_valid, u64 attr_version) | ||
123 | { | 127 | { |
124 | struct fuse_conn *fc = get_fuse_conn(inode); | 128 | struct fuse_conn *fc = get_fuse_conn(inode); |
125 | struct fuse_inode *fi = get_fuse_inode(inode); | 129 | struct fuse_inode *fi = get_fuse_inode(inode); |
126 | loff_t oldsize; | ||
127 | 130 | ||
128 | spin_lock(&fc->lock); | ||
129 | if (attr_version != 0 && fi->attr_version > attr_version) { | ||
130 | spin_unlock(&fc->lock); | ||
131 | return; | ||
132 | } | ||
133 | fi->attr_version = ++fc->attr_version; | 131 | fi->attr_version = ++fc->attr_version; |
134 | fi->i_time = attr_valid; | 132 | fi->i_time = attr_valid; |
135 | 133 | ||
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
159 | fi->orig_i_mode = inode->i_mode; | 157 | fi->orig_i_mode = inode->i_mode; |
160 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) | 158 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) |
161 | inode->i_mode &= ~S_ISVTX; | 159 | inode->i_mode &= ~S_ISVTX; |
160 | } | ||
161 | |||
162 | void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | ||
163 | u64 attr_valid, u64 attr_version) | ||
164 | { | ||
165 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
166 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
167 | loff_t oldsize; | ||
168 | |||
169 | spin_lock(&fc->lock); | ||
170 | if (attr_version != 0 && fi->attr_version > attr_version) { | ||
171 | spin_unlock(&fc->lock); | ||
172 | return; | ||
173 | } | ||
174 | |||
175 | fuse_change_attributes_common(inode, attr, attr_valid); | ||
162 | 176 | ||
163 | oldsize = inode->i_size; | 177 | oldsize = inode->i_size; |
164 | i_size_write(inode, attr->size); | 178 | i_size_write(inode, attr->size); |
@@ -468,6 +482,8 @@ static struct fuse_conn *new_conn(struct super_block *sb) | |||
468 | atomic_set(&fc->num_waiting, 0); | 482 | atomic_set(&fc->num_waiting, 0); |
469 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 483 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
470 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | 484 | fc->bdi.unplug_io_fn = default_unplug_io_fn; |
485 | /* fuse does it's own writeback accounting */ | ||
486 | fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; | ||
471 | fc->dev = sb->s_dev; | 487 | fc->dev = sb->s_dev; |
472 | err = bdi_init(&fc->bdi); | 488 | err = bdi_init(&fc->bdi); |
473 | if (err) | 489 | if (err) |
@@ -475,6 +491,19 @@ static struct fuse_conn *new_conn(struct super_block *sb) | |||
475 | err = bdi_register_dev(&fc->bdi, fc->dev); | 491 | err = bdi_register_dev(&fc->bdi, fc->dev); |
476 | if (err) | 492 | if (err) |
477 | goto error_bdi_destroy; | 493 | goto error_bdi_destroy; |
494 | /* | ||
495 | * For a single fuse filesystem use max 1% of dirty + | ||
496 | * writeback threshold. | ||
497 | * | ||
498 | * This gives about 1M of write buffer for memory maps on a | ||
499 | * machine with 1G and 10% dirty_ratio, which should be more | ||
500 | * than enough. | ||
501 | * | ||
502 | * Privileged users can raise it by writing to | ||
503 | * | ||
504 | * /sys/class/bdi/<bdi>/max_ratio | ||
505 | */ | ||
506 | bdi_set_max_ratio(&fc->bdi, 1); | ||
478 | fc->reqctr = 0; | 507 | fc->reqctr = 0; |
479 | fc->blocked = 1; | 508 | fc->blocked = 1; |
480 | fc->attr_version = 1; | 509 | fc->attr_version = 1; |