aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c73
1 files changed, 39 insertions, 34 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 23bb0ceabe31..412593703d1e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -5,6 +5,7 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/pagemap.h> 6#include <linux/pagemap.h>
7#include <linux/writeback.h> /* generic_writepages */ 7#include <linux/writeback.h> /* generic_writepages */
8#include <linux/slab.h>
8#include <linux/pagevec.h> 9#include <linux/pagevec.h>
9#include <linux/task_io_accounting_ops.h> 10#include <linux/task_io_accounting_ops.h>
10 11
@@ -336,16 +337,15 @@ out:
336/* 337/*
337 * Get ref for the oldest snapc for an inode with dirty data... that is, the 338 * Get ref for the oldest snapc for an inode with dirty data... that is, the
338 * only snap context we are allowed to write back. 339 * only snap context we are allowed to write back.
339 *
340 * Caller holds i_lock.
341 */ 340 */
342static struct ceph_snap_context *__get_oldest_context(struct inode *inode, 341static struct ceph_snap_context *get_oldest_context(struct inode *inode,
343 u64 *snap_size) 342 u64 *snap_size)
344{ 343{
345 struct ceph_inode_info *ci = ceph_inode(inode); 344 struct ceph_inode_info *ci = ceph_inode(inode);
346 struct ceph_snap_context *snapc = NULL; 345 struct ceph_snap_context *snapc = NULL;
347 struct ceph_cap_snap *capsnap = NULL; 346 struct ceph_cap_snap *capsnap = NULL;
348 347
348 spin_lock(&inode->i_lock);
349 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 349 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
350 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 350 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
351 capsnap->context, capsnap->dirty_pages); 351 capsnap->context, capsnap->dirty_pages);
@@ -356,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
356 break; 356 break;
357 } 357 }
358 } 358 }
359 if (!snapc && ci->i_snap_realm) { 359 if (!snapc && ci->i_head_snapc) {
360 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 360 snapc = ceph_get_snap_context(ci->i_head_snapc);
361 dout(" head snapc %p has %d dirty pages\n", 361 dout(" head snapc %p has %d dirty pages\n",
362 snapc, ci->i_wrbuffer_ref_head); 362 snapc, ci->i_wrbuffer_ref_head);
363 } 363 }
364 return snapc;
365}
366
367static struct ceph_snap_context *get_oldest_context(struct inode *inode,
368 u64 *snap_size)
369{
370 struct ceph_snap_context *snapc = NULL;
371
372 spin_lock(&inode->i_lock);
373 snapc = __get_oldest_context(inode, snap_size);
374 spin_unlock(&inode->i_lock); 364 spin_unlock(&inode->i_lock);
375 return snapc; 365 return snapc;
376} 366}
@@ -391,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
391 int len = PAGE_CACHE_SIZE; 381 int len = PAGE_CACHE_SIZE;
392 loff_t i_size; 382 loff_t i_size;
393 int err = 0; 383 int err = 0;
394 struct ceph_snap_context *snapc; 384 struct ceph_snap_context *snapc, *oldest;
395 u64 snap_size = 0; 385 u64 snap_size = 0;
396 long writeback_stat; 386 long writeback_stat;
397 387
@@ -412,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
412 dout("writepage %p page %p not dirty?\n", inode, page); 402 dout("writepage %p page %p not dirty?\n", inode, page);
413 goto out; 403 goto out;
414 } 404 }
415 if (snapc != get_oldest_context(inode, &snap_size)) { 405 oldest = get_oldest_context(inode, &snap_size);
406 if (snapc->seq > oldest->seq) {
416 dout("writepage %p page %p snapc %p not writeable - noop\n", 407 dout("writepage %p page %p snapc %p not writeable - noop\n",
417 inode, page, (void *)page->private); 408 inode, page, (void *)page->private);
418 /* we should only noop if called by kswapd */ 409 /* we should only noop if called by kswapd */
419 WARN_ON((current->flags & PF_MEMALLOC) == 0); 410 WARN_ON((current->flags & PF_MEMALLOC) == 0);
411 ceph_put_snap_context(oldest);
420 goto out; 412 goto out;
421 } 413 }
414 ceph_put_snap_context(oldest);
422 415
423 /* is this a partial page at end of file? */ 416 /* is this a partial page at end of file? */
424 if (snap_size) 417 if (snap_size)
@@ -457,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
457 ClearPagePrivate(page); 450 ClearPagePrivate(page);
458 end_page_writeback(page); 451 end_page_writeback(page);
459 ceph_put_wrbuffer_cap_refs(ci, 1, snapc); 452 ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
460 ceph_put_snap_context(snapc); 453 ceph_put_snap_context(snapc); /* page's reference */
461out: 454out:
462 return err; 455 return err;
463} 456}
@@ -557,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
557 dout("inode %p skipping page %p\n", inode, page); 550 dout("inode %p skipping page %p\n", inode, page);
558 wbc->pages_skipped++; 551 wbc->pages_skipped++;
559 } 552 }
553 ceph_put_snap_context((void *)page->private);
560 page->private = 0; 554 page->private = 0;
561 ClearPagePrivate(page); 555 ClearPagePrivate(page);
562 ceph_put_snap_context(snapc);
563 dout("unlocking %d %p\n", i, page); 556 dout("unlocking %d %p\n", i, page);
564 end_page_writeback(page); 557 end_page_writeback(page);
565 558
@@ -617,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
617 int range_whole = 0; 610 int range_whole = 0;
618 int should_loop = 1; 611 int should_loop = 1;
619 pgoff_t max_pages = 0, max_pages_ever = 0; 612 pgoff_t max_pages = 0, max_pages_ever = 0;
620 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; 613 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
621 struct pagevec pvec; 614 struct pagevec pvec;
622 int done = 0; 615 int done = 0;
623 int rc = 0; 616 int rc = 0;
@@ -769,9 +762,10 @@ get_more_pages:
769 } 762 }
770 763
771 /* only if matching snap context */ 764 /* only if matching snap context */
772 if (snapc != (void *)page->private) { 765 pgsnapc = (void *)page->private;
773 dout("page snapc %p != oldest %p\n", 766 if (pgsnapc->seq > snapc->seq) {
774 (void *)page->private, snapc); 767 dout("page snapc %p %lld > oldest %p %lld\n",
768 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
775 unlock_page(page); 769 unlock_page(page);
776 if (!locked_pages) 770 if (!locked_pages)
777 continue; /* keep looking for snap */ 771 continue; /* keep looking for snap */
@@ -913,12 +907,19 @@ static int context_is_writeable_or_written(struct inode *inode,
913 struct ceph_snap_context *snapc) 907 struct ceph_snap_context *snapc)
914{ 908{
915 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); 909 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
916 return !oldest || snapc->seq <= oldest->seq; 910 int ret = !oldest || snapc->seq <= oldest->seq;
911
912 ceph_put_snap_context(oldest);
913 return ret;
917} 914}
918 915
919/* 916/*
920 * We are only allowed to write into/dirty the page if the page is 917 * We are only allowed to write into/dirty the page if the page is
921 * clean, or already dirty within the same snap context. 918 * clean, or already dirty within the same snap context.
919 *
920 * called with page locked.
921 * return success with page locked,
922 * or any failure (incl -EAGAIN) with page unlocked.
922 */ 923 */
923static int ceph_update_writeable_page(struct file *file, 924static int ceph_update_writeable_page(struct file *file,
924 loff_t pos, unsigned len, 925 loff_t pos, unsigned len,
@@ -931,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
931 int pos_in_page = pos & ~PAGE_CACHE_MASK; 932 int pos_in_page = pos & ~PAGE_CACHE_MASK;
932 int end_in_page = pos_in_page + len; 933 int end_in_page = pos_in_page + len;
933 loff_t i_size; 934 loff_t i_size;
934 struct ceph_snap_context *snapc;
935 int r; 935 int r;
936 struct ceph_snap_context *snapc, *oldest;
936 937
937retry_locked: 938retry_locked:
938 /* writepages currently holds page lock, but if we change that later, */ 939 /* writepages currently holds page lock, but if we change that later, */
@@ -942,30 +943,34 @@ retry_locked:
942 BUG_ON(!ci->i_snap_realm); 943 BUG_ON(!ci->i_snap_realm);
943 down_read(&mdsc->snap_rwsem); 944 down_read(&mdsc->snap_rwsem);
944 BUG_ON(!ci->i_snap_realm->cached_context); 945 BUG_ON(!ci->i_snap_realm->cached_context);
945 if (page->private && 946 snapc = (void *)page->private;
946 (void *)page->private != ci->i_snap_realm->cached_context) { 947 if (snapc && snapc != ci->i_head_snapc) {
947 /* 948 /*
948 * this page is already dirty in another (older) snap 949 * this page is already dirty in another (older) snap
949 * context! is it writeable now? 950 * context! is it writeable now?
950 */ 951 */
951 snapc = get_oldest_context(inode, NULL); 952 oldest = get_oldest_context(inode, NULL);
952 up_read(&mdsc->snap_rwsem); 953 up_read(&mdsc->snap_rwsem);
953 954
954 if (snapc != (void *)page->private) { 955 if (snapc->seq > oldest->seq) {
956 ceph_put_snap_context(oldest);
955 dout(" page %p snapc %p not current or oldest\n", 957 dout(" page %p snapc %p not current or oldest\n",
956 page, (void *)page->private); 958 page, snapc);
957 /* 959 /*
958 * queue for writeback, and wait for snapc to 960 * queue for writeback, and wait for snapc to
959 * be writeable or written 961 * be writeable or written
960 */ 962 */
961 snapc = ceph_get_snap_context((void *)page->private); 963 snapc = ceph_get_snap_context(snapc);
962 unlock_page(page); 964 unlock_page(page);
963 ceph_queue_writeback(inode); 965 ceph_queue_writeback(inode);
964 wait_event_interruptible(ci->i_cap_wq, 966 r = wait_event_interruptible(ci->i_cap_wq,
965 context_is_writeable_or_written(inode, snapc)); 967 context_is_writeable_or_written(inode, snapc));
966 ceph_put_snap_context(snapc); 968 ceph_put_snap_context(snapc);
969 if (r == -ERESTARTSYS)
970 return r;
967 return -EAGAIN; 971 return -EAGAIN;
968 } 972 }
973 ceph_put_snap_context(oldest);
969 974
970 /* yay, writeable, do it now (without dropping page lock) */ 975 /* yay, writeable, do it now (without dropping page lock) */
971 dout(" page %p snapc %p not current, but oldest\n", 976 dout(" page %p snapc %p not current, but oldest\n",
@@ -1035,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1035 int r; 1040 int r;
1036 1041
1037 do { 1042 do {
1038 /* get a page*/ 1043 /* get a page */
1039 page = grab_cache_page_write_begin(mapping, index, 0); 1044 page = grab_cache_page_write_begin(mapping, index, 0);
1040 if (!page) 1045 if (!page)
1041 return -ENOMEM; 1046 return -ENOMEM;