aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-04-14 21:45:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-14 21:45:31 -0400
commit96e35b40c0d6206f56370f937f6f4722739eb273 (patch)
tree2c387b6e3f628484a1f4bdc964e529f89d5f5821 /fs/ceph/addr.c
parentf5c07a2d8acfc98e00d3be6298f979e5b3175953 (diff)
parenta6a5349d17f2a5c37079826f1a1474c3d08c6b53 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: use separate class for ceph sockets' sk_lock ceph: reserve one more caps space when doing readdir ceph: queue_cap_snap should always queue dirty context ceph: fix dentry reference leak in dcache readdir ceph: decode v5 of osdmap (pool names) [protocol change] ceph: fix ack counter reset on connection reset ceph: fix leaked inode ref due to snap metadata writeback race ceph: fix snap context reference leaks ceph: allow writeback of snapped pages older than 'oldest' snapc ceph: fix dentry rehashing on virtual .snap dir
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c62
1 files changed, 30 insertions, 32 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index aa3cd7cc3e40..412593703d1e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -337,16 +337,15 @@ out:
337/* 337/*
338 * Get ref for the oldest snapc for an inode with dirty data... that is, the 338 * Get ref for the oldest snapc for an inode with dirty data... that is, the
339 * only snap context we are allowed to write back. 339 * only snap context we are allowed to write back.
340 *
341 * Caller holds i_lock.
342 */ 340 */
343static struct ceph_snap_context *__get_oldest_context(struct inode *inode, 341static struct ceph_snap_context *get_oldest_context(struct inode *inode,
344 u64 *snap_size) 342 u64 *snap_size)
345{ 343{
346 struct ceph_inode_info *ci = ceph_inode(inode); 344 struct ceph_inode_info *ci = ceph_inode(inode);
347 struct ceph_snap_context *snapc = NULL; 345 struct ceph_snap_context *snapc = NULL;
348 struct ceph_cap_snap *capsnap = NULL; 346 struct ceph_cap_snap *capsnap = NULL;
349 347
348 spin_lock(&inode->i_lock);
350 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 349 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
351 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 350 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
352 capsnap->context, capsnap->dirty_pages); 351 capsnap->context, capsnap->dirty_pages);
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode,
357 break; 356 break;
358 } 357 }
359 } 358 }
360 if (!snapc && ci->i_snap_realm) { 359 if (!snapc && ci->i_head_snapc) {
361 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 360 snapc = ceph_get_snap_context(ci->i_head_snapc);
362 dout(" head snapc %p has %d dirty pages\n", 361 dout(" head snapc %p has %d dirty pages\n",
363 snapc, ci->i_wrbuffer_ref_head); 362 snapc, ci->i_wrbuffer_ref_head);
364 } 363 }
365 return snapc;
366}
367
368static struct ceph_snap_context *get_oldest_context(struct inode *inode,
369 u64 *snap_size)
370{
371 struct ceph_snap_context *snapc = NULL;
372
373 spin_lock(&inode->i_lock);
374 snapc = __get_oldest_context(inode, snap_size);
375 spin_unlock(&inode->i_lock); 364 spin_unlock(&inode->i_lock);
376 return snapc; 365 return snapc;
377} 366}
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
392 int len = PAGE_CACHE_SIZE; 381 int len = PAGE_CACHE_SIZE;
393 loff_t i_size; 382 loff_t i_size;
394 int err = 0; 383 int err = 0;
395 struct ceph_snap_context *snapc; 384 struct ceph_snap_context *snapc, *oldest;
396 u64 snap_size = 0; 385 u64 snap_size = 0;
397 long writeback_stat; 386 long writeback_stat;
398 387
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
413 dout("writepage %p page %p not dirty?\n", inode, page); 402 dout("writepage %p page %p not dirty?\n", inode, page);
414 goto out; 403 goto out;
415 } 404 }
416 if (snapc != get_oldest_context(inode, &snap_size)) { 405 oldest = get_oldest_context(inode, &snap_size);
406 if (snapc->seq > oldest->seq) {
417 dout("writepage %p page %p snapc %p not writeable - noop\n", 407 dout("writepage %p page %p snapc %p not writeable - noop\n",
418 inode, page, (void *)page->private); 408 inode, page, (void *)page->private);
419 /* we should only noop if called by kswapd */ 409 /* we should only noop if called by kswapd */
420 WARN_ON((current->flags & PF_MEMALLOC) == 0); 410 WARN_ON((current->flags & PF_MEMALLOC) == 0);
411 ceph_put_snap_context(oldest);
421 goto out; 412 goto out;
422 } 413 }
414 ceph_put_snap_context(oldest);
423 415
424 /* is this a partial page at end of file? */ 416 /* is this a partial page at end of file? */
425 if (snap_size) 417 if (snap_size)
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
458 ClearPagePrivate(page); 450 ClearPagePrivate(page);
459 end_page_writeback(page); 451 end_page_writeback(page);
460 ceph_put_wrbuffer_cap_refs(ci, 1, snapc); 452 ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
461 ceph_put_snap_context(snapc); 453 ceph_put_snap_context(snapc); /* page's reference */
462out: 454out:
463 return err; 455 return err;
464} 456}
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req,
558 dout("inode %p skipping page %p\n", inode, page); 550 dout("inode %p skipping page %p\n", inode, page);
559 wbc->pages_skipped++; 551 wbc->pages_skipped++;
560 } 552 }
553 ceph_put_snap_context((void *)page->private);
561 page->private = 0; 554 page->private = 0;
562 ClearPagePrivate(page); 555 ClearPagePrivate(page);
563 ceph_put_snap_context(snapc);
564 dout("unlocking %d %p\n", i, page); 556 dout("unlocking %d %p\n", i, page);
565 end_page_writeback(page); 557 end_page_writeback(page);
566 558
@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping,
618 int range_whole = 0; 610 int range_whole = 0;
619 int should_loop = 1; 611 int should_loop = 1;
620 pgoff_t max_pages = 0, max_pages_ever = 0; 612 pgoff_t max_pages = 0, max_pages_ever = 0;
621 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; 613 struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
622 struct pagevec pvec; 614 struct pagevec pvec;
623 int done = 0; 615 int done = 0;
624 int rc = 0; 616 int rc = 0;
@@ -770,9 +762,10 @@ get_more_pages:
770 } 762 }
771 763
772 /* only if matching snap context */ 764 /* only if matching snap context */
773 if (snapc != (void *)page->private) { 765 pgsnapc = (void *)page->private;
774 dout("page snapc %p != oldest %p\n", 766 if (pgsnapc->seq > snapc->seq) {
775 (void *)page->private, snapc); 767 dout("page snapc %p %lld > oldest %p %lld\n",
768 pgsnapc, pgsnapc->seq, snapc, snapc->seq);
776 unlock_page(page); 769 unlock_page(page);
777 if (!locked_pages) 770 if (!locked_pages)
778 continue; /* keep looking for snap */ 771 continue; /* keep looking for snap */
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode,
914 struct ceph_snap_context *snapc) 907 struct ceph_snap_context *snapc)
915{ 908{
916 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); 909 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
917 return !oldest || snapc->seq <= oldest->seq; 910 int ret = !oldest || snapc->seq <= oldest->seq;
911
912 ceph_put_snap_context(oldest);
913 return ret;
918} 914}
919 915
920/* 916/*
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file,
936 int pos_in_page = pos & ~PAGE_CACHE_MASK; 932 int pos_in_page = pos & ~PAGE_CACHE_MASK;
937 int end_in_page = pos_in_page + len; 933 int end_in_page = pos_in_page + len;
938 loff_t i_size; 934 loff_t i_size;
939 struct ceph_snap_context *snapc;
940 int r; 935 int r;
936 struct ceph_snap_context *snapc, *oldest;
941 937
942retry_locked: 938retry_locked:
943 /* writepages currently holds page lock, but if we change that later, */ 939 /* writepages currently holds page lock, but if we change that later, */
@@ -947,23 +943,24 @@ retry_locked:
947 BUG_ON(!ci->i_snap_realm); 943 BUG_ON(!ci->i_snap_realm);
948 down_read(&mdsc->snap_rwsem); 944 down_read(&mdsc->snap_rwsem);
949 BUG_ON(!ci->i_snap_realm->cached_context); 945 BUG_ON(!ci->i_snap_realm->cached_context);
950 if (page->private && 946 snapc = (void *)page->private;
951 (void *)page->private != ci->i_snap_realm->cached_context) { 947 if (snapc && snapc != ci->i_head_snapc) {
952 /* 948 /*
953 * this page is already dirty in another (older) snap 949 * this page is already dirty in another (older) snap
954 * context! is it writeable now? 950 * context! is it writeable now?
955 */ 951 */
956 snapc = get_oldest_context(inode, NULL); 952 oldest = get_oldest_context(inode, NULL);
957 up_read(&mdsc->snap_rwsem); 953 up_read(&mdsc->snap_rwsem);
958 954
959 if (snapc != (void *)page->private) { 955 if (snapc->seq > oldest->seq) {
956 ceph_put_snap_context(oldest);
960 dout(" page %p snapc %p not current or oldest\n", 957 dout(" page %p snapc %p not current or oldest\n",
961 page, (void *)page->private); 958 page, snapc);
962 /* 959 /*
963 * queue for writeback, and wait for snapc to 960 * queue for writeback, and wait for snapc to
964 * be writeable or written 961 * be writeable or written
965 */ 962 */
966 snapc = ceph_get_snap_context((void *)page->private); 963 snapc = ceph_get_snap_context(snapc);
967 unlock_page(page); 964 unlock_page(page);
968 ceph_queue_writeback(inode); 965 ceph_queue_writeback(inode);
969 r = wait_event_interruptible(ci->i_cap_wq, 966 r = wait_event_interruptible(ci->i_cap_wq,
@@ -973,6 +970,7 @@ retry_locked:
973 return r; 970 return r;
974 return -EAGAIN; 971 return -EAGAIN;
975 } 972 }
973 ceph_put_snap_context(oldest);
976 974
977 /* yay, writeable, do it now (without dropping page lock) */ 975 /* yay, writeable, do it now (without dropping page lock) */
978 dout(" page %p snapc %p not current, but oldest\n", 976 dout(" page %p snapc %p not current, but oldest\n",