diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-14 21:45:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-14 21:45:31 -0400 |
commit | 96e35b40c0d6206f56370f937f6f4722739eb273 (patch) | |
tree | 2c387b6e3f628484a1f4bdc964e529f89d5f5821 /fs/ceph/addr.c | |
parent | f5c07a2d8acfc98e00d3be6298f979e5b3175953 (diff) | |
parent | a6a5349d17f2a5c37079826f1a1474c3d08c6b53 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: use separate class for ceph sockets' sk_lock
ceph: reserve one more caps space when doing readdir
ceph: queue_cap_snap should always queue dirty context
ceph: fix dentry reference leak in dcache readdir
ceph: decode v5 of osdmap (pool names) [protocol change]
ceph: fix ack counter reset on connection reset
ceph: fix leaked inode ref due to snap metadata writeback race
ceph: fix snap context reference leaks
ceph: allow writeback of snapped pages older than 'oldest' snapc
ceph: fix dentry rehashing on virtual .snap dir
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r-- | fs/ceph/addr.c | 62 |
1 files changed, 30 insertions, 32 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index aa3cd7cc3e40..412593703d1e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -337,16 +337,15 @@ out: | |||
337 | /* | 337 | /* |
338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the | 338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the |
339 | * only snap context we are allowed to write back. | 339 | * only snap context we are allowed to write back. |
340 | * | ||
341 | * Caller holds i_lock. | ||
342 | */ | 340 | */ |
343 | static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | 341 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, |
344 | u64 *snap_size) | 342 | u64 *snap_size) |
345 | { | 343 | { |
346 | struct ceph_inode_info *ci = ceph_inode(inode); | 344 | struct ceph_inode_info *ci = ceph_inode(inode); |
347 | struct ceph_snap_context *snapc = NULL; | 345 | struct ceph_snap_context *snapc = NULL; |
348 | struct ceph_cap_snap *capsnap = NULL; | 346 | struct ceph_cap_snap *capsnap = NULL; |
349 | 347 | ||
348 | spin_lock(&inode->i_lock); | ||
350 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 349 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
351 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, | 350 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, |
352 | capsnap->context, capsnap->dirty_pages); | 351 | capsnap->context, capsnap->dirty_pages); |
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | |||
357 | break; | 356 | break; |
358 | } | 357 | } |
359 | } | 358 | } |
360 | if (!snapc && ci->i_snap_realm) { | 359 | if (!snapc && ci->i_head_snapc) { |
361 | snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); | 360 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
362 | dout(" head snapc %p has %d dirty pages\n", | 361 | dout(" head snapc %p has %d dirty pages\n", |
363 | snapc, ci->i_wrbuffer_ref_head); | 362 | snapc, ci->i_wrbuffer_ref_head); |
364 | } | 363 | } |
365 | return snapc; | ||
366 | } | ||
367 | |||
368 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, | ||
369 | u64 *snap_size) | ||
370 | { | ||
371 | struct ceph_snap_context *snapc = NULL; | ||
372 | |||
373 | spin_lock(&inode->i_lock); | ||
374 | snapc = __get_oldest_context(inode, snap_size); | ||
375 | spin_unlock(&inode->i_lock); | 364 | spin_unlock(&inode->i_lock); |
376 | return snapc; | 365 | return snapc; |
377 | } | 366 | } |
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
392 | int len = PAGE_CACHE_SIZE; | 381 | int len = PAGE_CACHE_SIZE; |
393 | loff_t i_size; | 382 | loff_t i_size; |
394 | int err = 0; | 383 | int err = 0; |
395 | struct ceph_snap_context *snapc; | 384 | struct ceph_snap_context *snapc, *oldest; |
396 | u64 snap_size = 0; | 385 | u64 snap_size = 0; |
397 | long writeback_stat; | 386 | long writeback_stat; |
398 | 387 | ||
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
413 | dout("writepage %p page %p not dirty?\n", inode, page); | 402 | dout("writepage %p page %p not dirty?\n", inode, page); |
414 | goto out; | 403 | goto out; |
415 | } | 404 | } |
416 | if (snapc != get_oldest_context(inode, &snap_size)) { | 405 | oldest = get_oldest_context(inode, &snap_size); |
406 | if (snapc->seq > oldest->seq) { | ||
417 | dout("writepage %p page %p snapc %p not writeable - noop\n", | 407 | dout("writepage %p page %p snapc %p not writeable - noop\n", |
418 | inode, page, (void *)page->private); | 408 | inode, page, (void *)page->private); |
419 | /* we should only noop if called by kswapd */ | 409 | /* we should only noop if called by kswapd */ |
420 | WARN_ON((current->flags & PF_MEMALLOC) == 0); | 410 | WARN_ON((current->flags & PF_MEMALLOC) == 0); |
411 | ceph_put_snap_context(oldest); | ||
421 | goto out; | 412 | goto out; |
422 | } | 413 | } |
414 | ceph_put_snap_context(oldest); | ||
423 | 415 | ||
424 | /* is this a partial page at end of file? */ | 416 | /* is this a partial page at end of file? */ |
425 | if (snap_size) | 417 | if (snap_size) |
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
458 | ClearPagePrivate(page); | 450 | ClearPagePrivate(page); |
459 | end_page_writeback(page); | 451 | end_page_writeback(page); |
460 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); | 452 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); |
461 | ceph_put_snap_context(snapc); | 453 | ceph_put_snap_context(snapc); /* page's reference */ |
462 | out: | 454 | out: |
463 | return err; | 455 | return err; |
464 | } | 456 | } |
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
558 | dout("inode %p skipping page %p\n", inode, page); | 550 | dout("inode %p skipping page %p\n", inode, page); |
559 | wbc->pages_skipped++; | 551 | wbc->pages_skipped++; |
560 | } | 552 | } |
553 | ceph_put_snap_context((void *)page->private); | ||
561 | page->private = 0; | 554 | page->private = 0; |
562 | ClearPagePrivate(page); | 555 | ClearPagePrivate(page); |
563 | ceph_put_snap_context(snapc); | ||
564 | dout("unlocking %d %p\n", i, page); | 556 | dout("unlocking %d %p\n", i, page); |
565 | end_page_writeback(page); | 557 | end_page_writeback(page); |
566 | 558 | ||
@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
618 | int range_whole = 0; | 610 | int range_whole = 0; |
619 | int should_loop = 1; | 611 | int should_loop = 1; |
620 | pgoff_t max_pages = 0, max_pages_ever = 0; | 612 | pgoff_t max_pages = 0, max_pages_ever = 0; |
621 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; | 613 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; |
622 | struct pagevec pvec; | 614 | struct pagevec pvec; |
623 | int done = 0; | 615 | int done = 0; |
624 | int rc = 0; | 616 | int rc = 0; |
@@ -770,9 +762,10 @@ get_more_pages: | |||
770 | } | 762 | } |
771 | 763 | ||
772 | /* only if matching snap context */ | 764 | /* only if matching snap context */ |
773 | if (snapc != (void *)page->private) { | 765 | pgsnapc = (void *)page->private; |
774 | dout("page snapc %p != oldest %p\n", | 766 | if (pgsnapc->seq > snapc->seq) { |
775 | (void *)page->private, snapc); | 767 | dout("page snapc %p %lld > oldest %p %lld\n", |
768 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); | ||
776 | unlock_page(page); | 769 | unlock_page(page); |
777 | if (!locked_pages) | 770 | if (!locked_pages) |
778 | continue; /* keep looking for snap */ | 771 | continue; /* keep looking for snap */ |
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
914 | struct ceph_snap_context *snapc) | 907 | struct ceph_snap_context *snapc) |
915 | { | 908 | { |
916 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); | 909 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); |
917 | return !oldest || snapc->seq <= oldest->seq; | 910 | int ret = !oldest || snapc->seq <= oldest->seq; |
911 | |||
912 | ceph_put_snap_context(oldest); | ||
913 | return ret; | ||
918 | } | 914 | } |
919 | 915 | ||
920 | /* | 916 | /* |
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file, | |||
936 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 932 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
937 | int end_in_page = pos_in_page + len; | 933 | int end_in_page = pos_in_page + len; |
938 | loff_t i_size; | 934 | loff_t i_size; |
939 | struct ceph_snap_context *snapc; | ||
940 | int r; | 935 | int r; |
936 | struct ceph_snap_context *snapc, *oldest; | ||
941 | 937 | ||
942 | retry_locked: | 938 | retry_locked: |
943 | /* writepages currently holds page lock, but if we change that later, */ | 939 | /* writepages currently holds page lock, but if we change that later, */ |
@@ -947,23 +943,24 @@ retry_locked: | |||
947 | BUG_ON(!ci->i_snap_realm); | 943 | BUG_ON(!ci->i_snap_realm); |
948 | down_read(&mdsc->snap_rwsem); | 944 | down_read(&mdsc->snap_rwsem); |
949 | BUG_ON(!ci->i_snap_realm->cached_context); | 945 | BUG_ON(!ci->i_snap_realm->cached_context); |
950 | if (page->private && | 946 | snapc = (void *)page->private; |
951 | (void *)page->private != ci->i_snap_realm->cached_context) { | 947 | if (snapc && snapc != ci->i_head_snapc) { |
952 | /* | 948 | /* |
953 | * this page is already dirty in another (older) snap | 949 | * this page is already dirty in another (older) snap |
954 | * context! is it writeable now? | 950 | * context! is it writeable now? |
955 | */ | 951 | */ |
956 | snapc = get_oldest_context(inode, NULL); | 952 | oldest = get_oldest_context(inode, NULL); |
957 | up_read(&mdsc->snap_rwsem); | 953 | up_read(&mdsc->snap_rwsem); |
958 | 954 | ||
959 | if (snapc != (void *)page->private) { | 955 | if (snapc->seq > oldest->seq) { |
956 | ceph_put_snap_context(oldest); | ||
960 | dout(" page %p snapc %p not current or oldest\n", | 957 | dout(" page %p snapc %p not current or oldest\n", |
961 | page, (void *)page->private); | 958 | page, snapc); |
962 | /* | 959 | /* |
963 | * queue for writeback, and wait for snapc to | 960 | * queue for writeback, and wait for snapc to |
964 | * be writeable or written | 961 | * be writeable or written |
965 | */ | 962 | */ |
966 | snapc = ceph_get_snap_context((void *)page->private); | 963 | snapc = ceph_get_snap_context(snapc); |
967 | unlock_page(page); | 964 | unlock_page(page); |
968 | ceph_queue_writeback(inode); | 965 | ceph_queue_writeback(inode); |
969 | r = wait_event_interruptible(ci->i_cap_wq, | 966 | r = wait_event_interruptible(ci->i_cap_wq, |
@@ -973,6 +970,7 @@ retry_locked: | |||
973 | return r; | 970 | return r; |
974 | return -EAGAIN; | 971 | return -EAGAIN; |
975 | } | 972 | } |
973 | ceph_put_snap_context(oldest); | ||
976 | 974 | ||
977 | /* yay, writeable, do it now (without dropping page lock) */ | 975 | /* yay, writeable, do it now (without dropping page lock) */ |
978 | dout(" page %p snapc %p not current, but oldest\n", | 976 | dout(" page %p snapc %p not current, but oldest\n", |