diff options
author | Sage Weil <sage@newdream.net> | 2010-04-01 00:52:10 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-04-01 12:34:36 -0400 |
commit | 80e755fedebc8de0599a79efad2c656503df2e62 (patch) | |
tree | 05f0285f4d040958377d6542394bc56b10f80257 /fs/ceph/addr.c | |
parent | 9358c6d4c0264b1572554c49c4b92673ea9a5c72 (diff) |
ceph: allow writeback of snapped pages older than 'oldest' snapc
On snap deletion, we don't regenerate ceph_cap_snaps for inodes with dirty
pages because deletion does not affect metadata writeback. However, we
did run into problems when we went to write back the pages because the
'oldest' snapc is determined by the oldest cap_snap, and that may be the
newer snapc that reflects the deletion. This caused confusion and an
infinite loop in ceph_update_writeable_page().
Change the snapc checks to allow writeback of any snapc that is equal to
OR older than the 'oldest' snapc.
When there are no cap_snaps, we were also using the realm's latest snapc
for writeback, which complicates ceph_put_wrbufffer_cap_refs(). Instead,
use i_head_snapc, the most snapc used for the most recent ('head') data.
This makes the writeback snapc (ceph_osd_request.r_snapc) _always_ match a
capsnap or i_head_snapc.
Also, in writepags_finish(), drop the snapc referenced by the _page_
and do not assume it matches the request snapc (it may not anymore).
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ce8ef610772..a313e9baeed 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -356,8 +356,8 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | |||
356 | break; | 356 | break; |
357 | } | 357 | } |
358 | } | 358 | } |
359 | if (!snapc && ci->i_snap_realm) { | 359 | if (!snapc && ci->i_head_snapc) { |
360 | snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); | 360 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
361 | dout(" head snapc %p has %d dirty pages\n", | 361 | dout(" head snapc %p has %d dirty pages\n", |
362 | snapc, ci->i_wrbuffer_ref_head); | 362 | snapc, ci->i_wrbuffer_ref_head); |
363 | } | 363 | } |
@@ -412,7 +412,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
412 | dout("writepage %p page %p not dirty?\n", inode, page); | 412 | dout("writepage %p page %p not dirty?\n", inode, page); |
413 | goto out; | 413 | goto out; |
414 | } | 414 | } |
415 | if (snapc != get_oldest_context(inode, &snap_size)) { | 415 | if (snapc->seq > get_oldest_context(inode, &snap_size)->seq) { |
416 | dout("writepage %p page %p snapc %p not writeable - noop\n", | 416 | dout("writepage %p page %p snapc %p not writeable - noop\n", |
417 | inode, page, (void *)page->private); | 417 | inode, page, (void *)page->private); |
418 | /* we should only noop if called by kswapd */ | 418 | /* we should only noop if called by kswapd */ |
@@ -557,9 +557,9 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
557 | dout("inode %p skipping page %p\n", inode, page); | 557 | dout("inode %p skipping page %p\n", inode, page); |
558 | wbc->pages_skipped++; | 558 | wbc->pages_skipped++; |
559 | } | 559 | } |
560 | ceph_put_snap_context((void *)page->private); | ||
560 | page->private = 0; | 561 | page->private = 0; |
561 | ClearPagePrivate(page); | 562 | ClearPagePrivate(page); |
562 | ceph_put_snap_context(snapc); | ||
563 | dout("unlocking %d %p\n", i, page); | 563 | dout("unlocking %d %p\n", i, page); |
564 | end_page_writeback(page); | 564 | end_page_writeback(page); |
565 | 565 | ||
@@ -617,7 +617,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
617 | int range_whole = 0; | 617 | int range_whole = 0; |
618 | int should_loop = 1; | 618 | int should_loop = 1; |
619 | pgoff_t max_pages = 0, max_pages_ever = 0; | 619 | pgoff_t max_pages = 0, max_pages_ever = 0; |
620 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; | 620 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; |
621 | struct pagevec pvec; | 621 | struct pagevec pvec; |
622 | int done = 0; | 622 | int done = 0; |
623 | int rc = 0; | 623 | int rc = 0; |
@@ -769,9 +769,10 @@ get_more_pages: | |||
769 | } | 769 | } |
770 | 770 | ||
771 | /* only if matching snap context */ | 771 | /* only if matching snap context */ |
772 | if (snapc != (void *)page->private) { | 772 | pgsnapc = (void *)page->private; |
773 | dout("page snapc %p != oldest %p\n", | 773 | if (pgsnapc->seq > snapc->seq) { |
774 | (void *)page->private, snapc); | 774 | dout("page snapc %p %lld > oldest %p %lld\n", |
775 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); | ||
775 | unlock_page(page); | 776 | unlock_page(page); |
776 | if (!locked_pages) | 777 | if (!locked_pages) |
777 | continue; /* keep looking for snap */ | 778 | continue; /* keep looking for snap */ |
@@ -935,8 +936,8 @@ static int ceph_update_writeable_page(struct file *file, | |||
935 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 936 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
936 | int end_in_page = pos_in_page + len; | 937 | int end_in_page = pos_in_page + len; |
937 | loff_t i_size; | 938 | loff_t i_size; |
938 | struct ceph_snap_context *snapc; | ||
939 | int r; | 939 | int r; |
940 | struct ceph_snap_context *snapc, *oldest; | ||
940 | 941 | ||
941 | retry_locked: | 942 | retry_locked: |
942 | /* writepages currently holds page lock, but if we change that later, */ | 943 | /* writepages currently holds page lock, but if we change that later, */ |
@@ -946,16 +947,16 @@ retry_locked: | |||
946 | BUG_ON(!ci->i_snap_realm); | 947 | BUG_ON(!ci->i_snap_realm); |
947 | down_read(&mdsc->snap_rwsem); | 948 | down_read(&mdsc->snap_rwsem); |
948 | BUG_ON(!ci->i_snap_realm->cached_context); | 949 | BUG_ON(!ci->i_snap_realm->cached_context); |
949 | if (page->private && | 950 | snapc = (void *)page->private; |
950 | (void *)page->private != ci->i_snap_realm->cached_context) { | 951 | if (snapc && snapc != ci->i_head_snapc) { |
951 | /* | 952 | /* |
952 | * this page is already dirty in another (older) snap | 953 | * this page is already dirty in another (older) snap |
953 | * context! is it writeable now? | 954 | * context! is it writeable now? |
954 | */ | 955 | */ |
955 | snapc = get_oldest_context(inode, NULL); | 956 | oldest = get_oldest_context(inode, NULL); |
956 | up_read(&mdsc->snap_rwsem); | 957 | up_read(&mdsc->snap_rwsem); |
957 | 958 | ||
958 | if (snapc != (void *)page->private) { | 959 | if (snapc->seq > oldest->seq) { |
959 | dout(" page %p snapc %p not current or oldest\n", | 960 | dout(" page %p snapc %p not current or oldest\n", |
960 | page, (void *)page->private); | 961 | page, (void *)page->private); |
961 | /* | 962 | /* |