diff options
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 73 |
1 files changed, 39 insertions, 34 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 23bb0ceabe31..412593703d1e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include <linux/pagemap.h> | 6 | #include <linux/pagemap.h> |
7 | #include <linux/writeback.h> /* generic_writepages */ | 7 | #include <linux/writeback.h> /* generic_writepages */ |
8 | #include <linux/slab.h> | ||
8 | #include <linux/pagevec.h> | 9 | #include <linux/pagevec.h> |
9 | #include <linux/task_io_accounting_ops.h> | 10 | #include <linux/task_io_accounting_ops.h> |
10 | 11 | ||
@@ -336,16 +337,15 @@ out: | |||
336 | /* | 337 | /* |
337 | * Get ref for the oldest snapc for an inode with dirty data... that is, the | 338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the |
338 | * only snap context we are allowed to write back. | 339 | * only snap context we are allowed to write back. |
339 | * | ||
340 | * Caller holds i_lock. | ||
341 | */ | 340 | */ |
342 | static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | 341 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, |
343 | u64 *snap_size) | 342 | u64 *snap_size) |
344 | { | 343 | { |
345 | struct ceph_inode_info *ci = ceph_inode(inode); | 344 | struct ceph_inode_info *ci = ceph_inode(inode); |
346 | struct ceph_snap_context *snapc = NULL; | 345 | struct ceph_snap_context *snapc = NULL; |
347 | struct ceph_cap_snap *capsnap = NULL; | 346 | struct ceph_cap_snap *capsnap = NULL; |
348 | 347 | ||
348 | spin_lock(&inode->i_lock); | ||
349 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 349 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
350 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, | 350 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, |
351 | capsnap->context, capsnap->dirty_pages); | 351 | capsnap->context, capsnap->dirty_pages); |
@@ -356,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | |||
356 | break; | 356 | break; |
357 | } | 357 | } |
358 | } | 358 | } |
359 | if (!snapc && ci->i_snap_realm) { | 359 | if (!snapc && ci->i_head_snapc) { |
360 | snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); | 360 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
361 | dout(" head snapc %p has %d dirty pages\n", | 361 | dout(" head snapc %p has %d dirty pages\n", |
362 | snapc, ci->i_wrbuffer_ref_head); | 362 | snapc, ci->i_wrbuffer_ref_head); |
363 | } | 363 | } |
364 | return snapc; | ||
365 | } | ||
366 | |||
367 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, | ||
368 | u64 *snap_size) | ||
369 | { | ||
370 | struct ceph_snap_context *snapc = NULL; | ||
371 | |||
372 | spin_lock(&inode->i_lock); | ||
373 | snapc = __get_oldest_context(inode, snap_size); | ||
374 | spin_unlock(&inode->i_lock); | 364 | spin_unlock(&inode->i_lock); |
375 | return snapc; | 365 | return snapc; |
376 | } | 366 | } |
@@ -391,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
391 | int len = PAGE_CACHE_SIZE; | 381 | int len = PAGE_CACHE_SIZE; |
392 | loff_t i_size; | 382 | loff_t i_size; |
393 | int err = 0; | 383 | int err = 0; |
394 | struct ceph_snap_context *snapc; | 384 | struct ceph_snap_context *snapc, *oldest; |
395 | u64 snap_size = 0; | 385 | u64 snap_size = 0; |
396 | long writeback_stat; | 386 | long writeback_stat; |
397 | 387 | ||
@@ -412,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
412 | dout("writepage %p page %p not dirty?\n", inode, page); | 402 | dout("writepage %p page %p not dirty?\n", inode, page); |
413 | goto out; | 403 | goto out; |
414 | } | 404 | } |
415 | if (snapc != get_oldest_context(inode, &snap_size)) { | 405 | oldest = get_oldest_context(inode, &snap_size); |
406 | if (snapc->seq > oldest->seq) { | ||
416 | dout("writepage %p page %p snapc %p not writeable - noop\n", | 407 | dout("writepage %p page %p snapc %p not writeable - noop\n", |
417 | inode, page, (void *)page->private); | 408 | inode, page, (void *)page->private); |
418 | /* we should only noop if called by kswapd */ | 409 | /* we should only noop if called by kswapd */ |
419 | WARN_ON((current->flags & PF_MEMALLOC) == 0); | 410 | WARN_ON((current->flags & PF_MEMALLOC) == 0); |
411 | ceph_put_snap_context(oldest); | ||
420 | goto out; | 412 | goto out; |
421 | } | 413 | } |
414 | ceph_put_snap_context(oldest); | ||
422 | 415 | ||
423 | /* is this a partial page at end of file? */ | 416 | /* is this a partial page at end of file? */ |
424 | if (snap_size) | 417 | if (snap_size) |
@@ -457,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
457 | ClearPagePrivate(page); | 450 | ClearPagePrivate(page); |
458 | end_page_writeback(page); | 451 | end_page_writeback(page); |
459 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); | 452 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); |
460 | ceph_put_snap_context(snapc); | 453 | ceph_put_snap_context(snapc); /* page's reference */ |
461 | out: | 454 | out: |
462 | return err; | 455 | return err; |
463 | } | 456 | } |
@@ -557,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
557 | dout("inode %p skipping page %p\n", inode, page); | 550 | dout("inode %p skipping page %p\n", inode, page); |
558 | wbc->pages_skipped++; | 551 | wbc->pages_skipped++; |
559 | } | 552 | } |
553 | ceph_put_snap_context((void *)page->private); | ||
560 | page->private = 0; | 554 | page->private = 0; |
561 | ClearPagePrivate(page); | 555 | ClearPagePrivate(page); |
562 | ceph_put_snap_context(snapc); | ||
563 | dout("unlocking %d %p\n", i, page); | 556 | dout("unlocking %d %p\n", i, page); |
564 | end_page_writeback(page); | 557 | end_page_writeback(page); |
565 | 558 | ||
@@ -617,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
617 | int range_whole = 0; | 610 | int range_whole = 0; |
618 | int should_loop = 1; | 611 | int should_loop = 1; |
619 | pgoff_t max_pages = 0, max_pages_ever = 0; | 612 | pgoff_t max_pages = 0, max_pages_ever = 0; |
620 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; | 613 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; |
621 | struct pagevec pvec; | 614 | struct pagevec pvec; |
622 | int done = 0; | 615 | int done = 0; |
623 | int rc = 0; | 616 | int rc = 0; |
@@ -769,9 +762,10 @@ get_more_pages: | |||
769 | } | 762 | } |
770 | 763 | ||
771 | /* only if matching snap context */ | 764 | /* only if matching snap context */ |
772 | if (snapc != (void *)page->private) { | 765 | pgsnapc = (void *)page->private; |
773 | dout("page snapc %p != oldest %p\n", | 766 | if (pgsnapc->seq > snapc->seq) { |
774 | (void *)page->private, snapc); | 767 | dout("page snapc %p %lld > oldest %p %lld\n", |
768 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); | ||
775 | unlock_page(page); | 769 | unlock_page(page); |
776 | if (!locked_pages) | 770 | if (!locked_pages) |
777 | continue; /* keep looking for snap */ | 771 | continue; /* keep looking for snap */ |
@@ -913,12 +907,19 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
913 | struct ceph_snap_context *snapc) | 907 | struct ceph_snap_context *snapc) |
914 | { | 908 | { |
915 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); | 909 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); |
916 | return !oldest || snapc->seq <= oldest->seq; | 910 | int ret = !oldest || snapc->seq <= oldest->seq; |
911 | |||
912 | ceph_put_snap_context(oldest); | ||
913 | return ret; | ||
917 | } | 914 | } |
918 | 915 | ||
919 | /* | 916 | /* |
920 | * We are only allowed to write into/dirty the page if the page is | 917 | * We are only allowed to write into/dirty the page if the page is |
921 | * clean, or already dirty within the same snap context. | 918 | * clean, or already dirty within the same snap context. |
919 | * | ||
920 | * called with page locked. | ||
921 | * return success with page locked, | ||
922 | * or any failure (incl -EAGAIN) with page unlocked. | ||
922 | */ | 923 | */ |
923 | static int ceph_update_writeable_page(struct file *file, | 924 | static int ceph_update_writeable_page(struct file *file, |
924 | loff_t pos, unsigned len, | 925 | loff_t pos, unsigned len, |
@@ -931,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file, | |||
931 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 932 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
932 | int end_in_page = pos_in_page + len; | 933 | int end_in_page = pos_in_page + len; |
933 | loff_t i_size; | 934 | loff_t i_size; |
934 | struct ceph_snap_context *snapc; | ||
935 | int r; | 935 | int r; |
936 | struct ceph_snap_context *snapc, *oldest; | ||
936 | 937 | ||
937 | retry_locked: | 938 | retry_locked: |
938 | /* writepages currently holds page lock, but if we change that later, */ | 939 | /* writepages currently holds page lock, but if we change that later, */ |
@@ -942,30 +943,34 @@ retry_locked: | |||
942 | BUG_ON(!ci->i_snap_realm); | 943 | BUG_ON(!ci->i_snap_realm); |
943 | down_read(&mdsc->snap_rwsem); | 944 | down_read(&mdsc->snap_rwsem); |
944 | BUG_ON(!ci->i_snap_realm->cached_context); | 945 | BUG_ON(!ci->i_snap_realm->cached_context); |
945 | if (page->private && | 946 | snapc = (void *)page->private; |
946 | (void *)page->private != ci->i_snap_realm->cached_context) { | 947 | if (snapc && snapc != ci->i_head_snapc) { |
947 | /* | 948 | /* |
948 | * this page is already dirty in another (older) snap | 949 | * this page is already dirty in another (older) snap |
949 | * context! is it writeable now? | 950 | * context! is it writeable now? |
950 | */ | 951 | */ |
951 | snapc = get_oldest_context(inode, NULL); | 952 | oldest = get_oldest_context(inode, NULL); |
952 | up_read(&mdsc->snap_rwsem); | 953 | up_read(&mdsc->snap_rwsem); |
953 | 954 | ||
954 | if (snapc != (void *)page->private) { | 955 | if (snapc->seq > oldest->seq) { |
956 | ceph_put_snap_context(oldest); | ||
955 | dout(" page %p snapc %p not current or oldest\n", | 957 | dout(" page %p snapc %p not current or oldest\n", |
956 | page, (void *)page->private); | 958 | page, snapc); |
957 | /* | 959 | /* |
958 | * queue for writeback, and wait for snapc to | 960 | * queue for writeback, and wait for snapc to |
959 | * be writeable or written | 961 | * be writeable or written |
960 | */ | 962 | */ |
961 | snapc = ceph_get_snap_context((void *)page->private); | 963 | snapc = ceph_get_snap_context(snapc); |
962 | unlock_page(page); | 964 | unlock_page(page); |
963 | ceph_queue_writeback(inode); | 965 | ceph_queue_writeback(inode); |
964 | wait_event_interruptible(ci->i_cap_wq, | 966 | r = wait_event_interruptible(ci->i_cap_wq, |
965 | context_is_writeable_or_written(inode, snapc)); | 967 | context_is_writeable_or_written(inode, snapc)); |
966 | ceph_put_snap_context(snapc); | 968 | ceph_put_snap_context(snapc); |
969 | if (r == -ERESTARTSYS) | ||
970 | return r; | ||
967 | return -EAGAIN; | 971 | return -EAGAIN; |
968 | } | 972 | } |
973 | ceph_put_snap_context(oldest); | ||
969 | 974 | ||
970 | /* yay, writeable, do it now (without dropping page lock) */ | 975 | /* yay, writeable, do it now (without dropping page lock) */ |
971 | dout(" page %p snapc %p not current, but oldest\n", | 976 | dout(" page %p snapc %p not current, but oldest\n", |
@@ -1035,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1035 | int r; | 1040 | int r; |
1036 | 1041 | ||
1037 | do { | 1042 | do { |
1038 | /* get a page*/ | 1043 | /* get a page */ |
1039 | page = grab_cache_page_write_begin(mapping, index, 0); | 1044 | page = grab_cache_page_write_begin(mapping, index, 0); |
1040 | if (!page) | 1045 | if (!page) |
1041 | return -ENOMEM; | 1046 | return -ENOMEM; |