diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/addr.c | 62 | ||||
-rw-r--r-- | fs/ceph/caps.c | 42 | ||||
-rw-r--r-- | fs/ceph/dir.c | 7 | ||||
-rw-r--r-- | fs/ceph/inode.c | 10 | ||||
-rw-r--r-- | fs/ceph/messenger.c | 9 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 180 | ||||
-rw-r--r-- | fs/ceph/osdmap.h | 1 | ||||
-rw-r--r-- | fs/ceph/rados.h | 6 | ||||
-rw-r--r-- | fs/ceph/snap.c | 26 | ||||
-rw-r--r-- | fs/ceph/super.h | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 38 |
12 files changed, 240 insertions, 148 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index aa3cd7cc3e40..412593703d1e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -337,16 +337,15 @@ out: | |||
337 | /* | 337 | /* |
338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the | 338 | * Get ref for the oldest snapc for an inode with dirty data... that is, the |
339 | * only snap context we are allowed to write back. | 339 | * only snap context we are allowed to write back. |
340 | * | ||
341 | * Caller holds i_lock. | ||
342 | */ | 340 | */ |
343 | static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | 341 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, |
344 | u64 *snap_size) | 342 | u64 *snap_size) |
345 | { | 343 | { |
346 | struct ceph_inode_info *ci = ceph_inode(inode); | 344 | struct ceph_inode_info *ci = ceph_inode(inode); |
347 | struct ceph_snap_context *snapc = NULL; | 345 | struct ceph_snap_context *snapc = NULL; |
348 | struct ceph_cap_snap *capsnap = NULL; | 346 | struct ceph_cap_snap *capsnap = NULL; |
349 | 347 | ||
348 | spin_lock(&inode->i_lock); | ||
350 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 349 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
351 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, | 350 | dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, |
352 | capsnap->context, capsnap->dirty_pages); | 351 | capsnap->context, capsnap->dirty_pages); |
@@ -357,21 +356,11 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode, | |||
357 | break; | 356 | break; |
358 | } | 357 | } |
359 | } | 358 | } |
360 | if (!snapc && ci->i_snap_realm) { | 359 | if (!snapc && ci->i_head_snapc) { |
361 | snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); | 360 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
362 | dout(" head snapc %p has %d dirty pages\n", | 361 | dout(" head snapc %p has %d dirty pages\n", |
363 | snapc, ci->i_wrbuffer_ref_head); | 362 | snapc, ci->i_wrbuffer_ref_head); |
364 | } | 363 | } |
365 | return snapc; | ||
366 | } | ||
367 | |||
368 | static struct ceph_snap_context *get_oldest_context(struct inode *inode, | ||
369 | u64 *snap_size) | ||
370 | { | ||
371 | struct ceph_snap_context *snapc = NULL; | ||
372 | |||
373 | spin_lock(&inode->i_lock); | ||
374 | snapc = __get_oldest_context(inode, snap_size); | ||
375 | spin_unlock(&inode->i_lock); | 364 | spin_unlock(&inode->i_lock); |
376 | return snapc; | 365 | return snapc; |
377 | } | 366 | } |
@@ -392,7 +381,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
392 | int len = PAGE_CACHE_SIZE; | 381 | int len = PAGE_CACHE_SIZE; |
393 | loff_t i_size; | 382 | loff_t i_size; |
394 | int err = 0; | 383 | int err = 0; |
395 | struct ceph_snap_context *snapc; | 384 | struct ceph_snap_context *snapc, *oldest; |
396 | u64 snap_size = 0; | 385 | u64 snap_size = 0; |
397 | long writeback_stat; | 386 | long writeback_stat; |
398 | 387 | ||
@@ -413,13 +402,16 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
413 | dout("writepage %p page %p not dirty?\n", inode, page); | 402 | dout("writepage %p page %p not dirty?\n", inode, page); |
414 | goto out; | 403 | goto out; |
415 | } | 404 | } |
416 | if (snapc != get_oldest_context(inode, &snap_size)) { | 405 | oldest = get_oldest_context(inode, &snap_size); |
406 | if (snapc->seq > oldest->seq) { | ||
417 | dout("writepage %p page %p snapc %p not writeable - noop\n", | 407 | dout("writepage %p page %p snapc %p not writeable - noop\n", |
418 | inode, page, (void *)page->private); | 408 | inode, page, (void *)page->private); |
419 | /* we should only noop if called by kswapd */ | 409 | /* we should only noop if called by kswapd */ |
420 | WARN_ON((current->flags & PF_MEMALLOC) == 0); | 410 | WARN_ON((current->flags & PF_MEMALLOC) == 0); |
411 | ceph_put_snap_context(oldest); | ||
421 | goto out; | 412 | goto out; |
422 | } | 413 | } |
414 | ceph_put_snap_context(oldest); | ||
423 | 415 | ||
424 | /* is this a partial page at end of file? */ | 416 | /* is this a partial page at end of file? */ |
425 | if (snap_size) | 417 | if (snap_size) |
@@ -458,7 +450,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
458 | ClearPagePrivate(page); | 450 | ClearPagePrivate(page); |
459 | end_page_writeback(page); | 451 | end_page_writeback(page); |
460 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); | 452 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); |
461 | ceph_put_snap_context(snapc); | 453 | ceph_put_snap_context(snapc); /* page's reference */ |
462 | out: | 454 | out: |
463 | return err; | 455 | return err; |
464 | } | 456 | } |
@@ -558,9 +550,9 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
558 | dout("inode %p skipping page %p\n", inode, page); | 550 | dout("inode %p skipping page %p\n", inode, page); |
559 | wbc->pages_skipped++; | 551 | wbc->pages_skipped++; |
560 | } | 552 | } |
553 | ceph_put_snap_context((void *)page->private); | ||
561 | page->private = 0; | 554 | page->private = 0; |
562 | ClearPagePrivate(page); | 555 | ClearPagePrivate(page); |
563 | ceph_put_snap_context(snapc); | ||
564 | dout("unlocking %d %p\n", i, page); | 556 | dout("unlocking %d %p\n", i, page); |
565 | end_page_writeback(page); | 557 | end_page_writeback(page); |
566 | 558 | ||
@@ -618,7 +610,7 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
618 | int range_whole = 0; | 610 | int range_whole = 0; |
619 | int should_loop = 1; | 611 | int should_loop = 1; |
620 | pgoff_t max_pages = 0, max_pages_ever = 0; | 612 | pgoff_t max_pages = 0, max_pages_ever = 0; |
621 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL; | 613 | struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc; |
622 | struct pagevec pvec; | 614 | struct pagevec pvec; |
623 | int done = 0; | 615 | int done = 0; |
624 | int rc = 0; | 616 | int rc = 0; |
@@ -770,9 +762,10 @@ get_more_pages: | |||
770 | } | 762 | } |
771 | 763 | ||
772 | /* only if matching snap context */ | 764 | /* only if matching snap context */ |
773 | if (snapc != (void *)page->private) { | 765 | pgsnapc = (void *)page->private; |
774 | dout("page snapc %p != oldest %p\n", | 766 | if (pgsnapc->seq > snapc->seq) { |
775 | (void *)page->private, snapc); | 767 | dout("page snapc %p %lld > oldest %p %lld\n", |
768 | pgsnapc, pgsnapc->seq, snapc, snapc->seq); | ||
776 | unlock_page(page); | 769 | unlock_page(page); |
777 | if (!locked_pages) | 770 | if (!locked_pages) |
778 | continue; /* keep looking for snap */ | 771 | continue; /* keep looking for snap */ |
@@ -914,7 +907,10 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
914 | struct ceph_snap_context *snapc) | 907 | struct ceph_snap_context *snapc) |
915 | { | 908 | { |
916 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); | 909 | struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); |
917 | return !oldest || snapc->seq <= oldest->seq; | 910 | int ret = !oldest || snapc->seq <= oldest->seq; |
911 | |||
912 | ceph_put_snap_context(oldest); | ||
913 | return ret; | ||
918 | } | 914 | } |
919 | 915 | ||
920 | /* | 916 | /* |
@@ -936,8 +932,8 @@ static int ceph_update_writeable_page(struct file *file, | |||
936 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 932 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
937 | int end_in_page = pos_in_page + len; | 933 | int end_in_page = pos_in_page + len; |
938 | loff_t i_size; | 934 | loff_t i_size; |
939 | struct ceph_snap_context *snapc; | ||
940 | int r; | 935 | int r; |
936 | struct ceph_snap_context *snapc, *oldest; | ||
941 | 937 | ||
942 | retry_locked: | 938 | retry_locked: |
943 | /* writepages currently holds page lock, but if we change that later, */ | 939 | /* writepages currently holds page lock, but if we change that later, */ |
@@ -947,23 +943,24 @@ retry_locked: | |||
947 | BUG_ON(!ci->i_snap_realm); | 943 | BUG_ON(!ci->i_snap_realm); |
948 | down_read(&mdsc->snap_rwsem); | 944 | down_read(&mdsc->snap_rwsem); |
949 | BUG_ON(!ci->i_snap_realm->cached_context); | 945 | BUG_ON(!ci->i_snap_realm->cached_context); |
950 | if (page->private && | 946 | snapc = (void *)page->private; |
951 | (void *)page->private != ci->i_snap_realm->cached_context) { | 947 | if (snapc && snapc != ci->i_head_snapc) { |
952 | /* | 948 | /* |
953 | * this page is already dirty in another (older) snap | 949 | * this page is already dirty in another (older) snap |
954 | * context! is it writeable now? | 950 | * context! is it writeable now? |
955 | */ | 951 | */ |
956 | snapc = get_oldest_context(inode, NULL); | 952 | oldest = get_oldest_context(inode, NULL); |
957 | up_read(&mdsc->snap_rwsem); | 953 | up_read(&mdsc->snap_rwsem); |
958 | 954 | ||
959 | if (snapc != (void *)page->private) { | 955 | if (snapc->seq > oldest->seq) { |
956 | ceph_put_snap_context(oldest); | ||
960 | dout(" page %p snapc %p not current or oldest\n", | 957 | dout(" page %p snapc %p not current or oldest\n", |
961 | page, (void *)page->private); | 958 | page, snapc); |
962 | /* | 959 | /* |
963 | * queue for writeback, and wait for snapc to | 960 | * queue for writeback, and wait for snapc to |
964 | * be writeable or written | 961 | * be writeable or written |
965 | */ | 962 | */ |
966 | snapc = ceph_get_snap_context((void *)page->private); | 963 | snapc = ceph_get_snap_context(snapc); |
967 | unlock_page(page); | 964 | unlock_page(page); |
968 | ceph_queue_writeback(inode); | 965 | ceph_queue_writeback(inode); |
969 | r = wait_event_interruptible(ci->i_cap_wq, | 966 | r = wait_event_interruptible(ci->i_cap_wq, |
@@ -973,6 +970,7 @@ retry_locked: | |||
973 | return r; | 970 | return r; |
974 | return -EAGAIN; | 971 | return -EAGAIN; |
975 | } | 972 | } |
973 | ceph_put_snap_context(oldest); | ||
976 | 974 | ||
977 | /* yay, writeable, do it now (without dropping page lock) */ | 975 | /* yay, writeable, do it now (without dropping page lock) */ |
978 | dout(" page %p snapc %p not current, but oldest\n", | 976 | dout(" page %p snapc %p not current, but oldest\n", |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 3710e077a857..aa2239fa9a3b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1205,6 +1205,12 @@ retry: | |||
1205 | if (capsnap->dirty_pages || capsnap->writing) | 1205 | if (capsnap->dirty_pages || capsnap->writing) |
1206 | continue; | 1206 | continue; |
1207 | 1207 | ||
1208 | /* | ||
1209 | * if cap writeback already occurred, we should have dropped | ||
1210 | * the capsnap in ceph_put_wrbuffer_cap_refs. | ||
1211 | */ | ||
1212 | BUG_ON(capsnap->dirty == 0); | ||
1213 | |||
1208 | /* pick mds, take s_mutex */ | 1214 | /* pick mds, take s_mutex */ |
1209 | mds = __ceph_get_cap_mds(ci, &mseq); | 1215 | mds = __ceph_get_cap_mds(ci, &mseq); |
1210 | if (session && session->s_mds != mds) { | 1216 | if (session && session->s_mds != mds) { |
@@ -2118,8 +2124,8 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) | |||
2118 | } | 2124 | } |
2119 | spin_unlock(&inode->i_lock); | 2125 | spin_unlock(&inode->i_lock); |
2120 | 2126 | ||
2121 | dout("put_cap_refs %p had %s %s\n", inode, ceph_cap_string(had), | 2127 | dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), |
2122 | last ? "last" : ""); | 2128 | last ? " last" : "", put ? " put" : ""); |
2123 | 2129 | ||
2124 | if (last && !flushsnaps) | 2130 | if (last && !flushsnaps) |
2125 | ceph_check_caps(ci, 0, NULL); | 2131 | ceph_check_caps(ci, 0, NULL); |
@@ -2143,7 +2149,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2143 | { | 2149 | { |
2144 | struct inode *inode = &ci->vfs_inode; | 2150 | struct inode *inode = &ci->vfs_inode; |
2145 | int last = 0; | 2151 | int last = 0; |
2146 | int last_snap = 0; | 2152 | int complete_capsnap = 0; |
2153 | int drop_capsnap = 0; | ||
2147 | int found = 0; | 2154 | int found = 0; |
2148 | struct ceph_cap_snap *capsnap = NULL; | 2155 | struct ceph_cap_snap *capsnap = NULL; |
2149 | 2156 | ||
@@ -2166,19 +2173,32 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2166 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { | 2173 | list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { |
2167 | if (capsnap->context == snapc) { | 2174 | if (capsnap->context == snapc) { |
2168 | found = 1; | 2175 | found = 1; |
2169 | capsnap->dirty_pages -= nr; | ||
2170 | last_snap = !capsnap->dirty_pages; | ||
2171 | break; | 2176 | break; |
2172 | } | 2177 | } |
2173 | } | 2178 | } |
2174 | BUG_ON(!found); | 2179 | BUG_ON(!found); |
2180 | capsnap->dirty_pages -= nr; | ||
2181 | if (capsnap->dirty_pages == 0) { | ||
2182 | complete_capsnap = 1; | ||
2183 | if (capsnap->dirty == 0) | ||
2184 | /* cap writeback completed before we created | ||
2185 | * the cap_snap; no FLUSHSNAP is needed */ | ||
2186 | drop_capsnap = 1; | ||
2187 | } | ||
2175 | dout("put_wrbuffer_cap_refs on %p cap_snap %p " | 2188 | dout("put_wrbuffer_cap_refs on %p cap_snap %p " |
2176 | " snap %lld %d/%d -> %d/%d %s%s\n", | 2189 | " snap %lld %d/%d -> %d/%d %s%s%s\n", |
2177 | inode, capsnap, capsnap->context->seq, | 2190 | inode, capsnap, capsnap->context->seq, |
2178 | ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, | 2191 | ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr, |
2179 | ci->i_wrbuffer_ref, capsnap->dirty_pages, | 2192 | ci->i_wrbuffer_ref, capsnap->dirty_pages, |
2180 | last ? " (wrbuffer last)" : "", | 2193 | last ? " (wrbuffer last)" : "", |
2181 | last_snap ? " (capsnap last)" : ""); | 2194 | complete_capsnap ? " (complete capsnap)" : "", |
2195 | drop_capsnap ? " (drop capsnap)" : ""); | ||
2196 | if (drop_capsnap) { | ||
2197 | ceph_put_snap_context(capsnap->context); | ||
2198 | list_del(&capsnap->ci_item); | ||
2199 | list_del(&capsnap->flushing_item); | ||
2200 | ceph_put_cap_snap(capsnap); | ||
2201 | } | ||
2182 | } | 2202 | } |
2183 | 2203 | ||
2184 | spin_unlock(&inode->i_lock); | 2204 | spin_unlock(&inode->i_lock); |
@@ -2186,10 +2206,12 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2186 | if (last) { | 2206 | if (last) { |
2187 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); | 2207 | ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); |
2188 | iput(inode); | 2208 | iput(inode); |
2189 | } else if (last_snap) { | 2209 | } else if (complete_capsnap) { |
2190 | ceph_flush_snaps(ci); | 2210 | ceph_flush_snaps(ci); |
2191 | wake_up(&ci->i_cap_wq); | 2211 | wake_up(&ci->i_cap_wq); |
2192 | } | 2212 | } |
2213 | if (drop_capsnap) | ||
2214 | iput(inode); | ||
2193 | } | 2215 | } |
2194 | 2216 | ||
2195 | /* | 2217 | /* |
@@ -2465,8 +2487,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid, | |||
2465 | break; | 2487 | break; |
2466 | } | 2488 | } |
2467 | WARN_ON(capsnap->dirty_pages || capsnap->writing); | 2489 | WARN_ON(capsnap->dirty_pages || capsnap->writing); |
2468 | dout(" removing cap_snap %p follows %lld\n", | 2490 | dout(" removing %p cap_snap %p follows %lld\n", |
2469 | capsnap, follows); | 2491 | inode, capsnap, follows); |
2470 | ceph_put_snap_context(capsnap->context); | 2492 | ceph_put_snap_context(capsnap->context); |
2471 | list_del(&capsnap->ci_item); | 2493 | list_del(&capsnap->ci_item); |
2472 | list_del(&capsnap->flushing_item); | 2494 | list_del(&capsnap->flushing_item); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 7261dc6c2ead..ea8ee2e526aa 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -171,11 +171,11 @@ more: | |||
171 | spin_lock(&inode->i_lock); | 171 | spin_lock(&inode->i_lock); |
172 | spin_lock(&dcache_lock); | 172 | spin_lock(&dcache_lock); |
173 | 173 | ||
174 | last = dentry; | ||
175 | |||
174 | if (err < 0) | 176 | if (err < 0) |
175 | goto out_unlock; | 177 | goto out_unlock; |
176 | 178 | ||
177 | last = dentry; | ||
178 | |||
179 | p = p->prev; | 179 | p = p->prev; |
180 | filp->f_pos++; | 180 | filp->f_pos++; |
181 | 181 | ||
@@ -312,7 +312,7 @@ more: | |||
312 | req->r_readdir_offset = fi->next_offset; | 312 | req->r_readdir_offset = fi->next_offset; |
313 | req->r_args.readdir.frag = cpu_to_le32(frag); | 313 | req->r_args.readdir.frag = cpu_to_le32(frag); |
314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | 314 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); |
315 | req->r_num_caps = max_entries; | 315 | req->r_num_caps = max_entries + 1; |
316 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 316 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
317 | if (err < 0) { | 317 | if (err < 0) { |
318 | ceph_mdsc_put_request(req); | 318 | ceph_mdsc_put_request(req); |
@@ -489,6 +489,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | |||
489 | struct inode *inode = ceph_get_snapdir(parent); | 489 | struct inode *inode = ceph_get_snapdir(parent); |
490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", |
491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); | 491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); |
492 | BUG_ON(!d_unhashed(dentry)); | ||
492 | d_add(dentry, inode); | 493 | d_add(dentry, inode); |
493 | err = 0; | 494 | err = 0; |
494 | } | 495 | } |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index aca82d55cc53..26f883c275e8 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -886,6 +886,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
886 | struct inode *in = NULL; | 886 | struct inode *in = NULL; |
887 | struct ceph_mds_reply_inode *ininfo; | 887 | struct ceph_mds_reply_inode *ininfo; |
888 | struct ceph_vino vino; | 888 | struct ceph_vino vino; |
889 | struct ceph_client *client = ceph_sb_to_client(sb); | ||
889 | int i = 0; | 890 | int i = 0; |
890 | int err = 0; | 891 | int err = 0; |
891 | 892 | ||
@@ -949,7 +950,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
949 | return err; | 950 | return err; |
950 | } | 951 | } |
951 | 952 | ||
952 | if (rinfo->head->is_dentry && !req->r_aborted) { | 953 | /* |
954 | * ignore null lease/binding on snapdir ENOENT, or else we | ||
955 | * will have trouble splicing in the virtual snapdir later | ||
956 | */ | ||
957 | if (rinfo->head->is_dentry && !req->r_aborted && | ||
958 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | ||
959 | client->mount_args->snapdir_name, | ||
960 | req->r_dentry->d_name.len))) { | ||
953 | /* | 961 | /* |
954 | * lookup link rename : null -> possibly existing inode | 962 | * lookup link rename : null -> possibly existing inode |
955 | * mknod symlink mkdir : null -> new inode | 963 | * mknod symlink mkdir : null -> new inode |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 8f1715ffbe4b..cdaaa131add3 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -30,6 +30,10 @@ static char tag_msg = CEPH_MSGR_TAG_MSG; | |||
30 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 30 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
31 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | 31 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; |
32 | 32 | ||
33 | #ifdef CONFIG_LOCKDEP | ||
34 | static struct lock_class_key socket_class; | ||
35 | #endif | ||
36 | |||
33 | 37 | ||
34 | static void queue_con(struct ceph_connection *con); | 38 | static void queue_con(struct ceph_connection *con); |
35 | static void con_work(struct work_struct *); | 39 | static void con_work(struct work_struct *); |
@@ -228,6 +232,10 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
228 | con->sock = sock; | 232 | con->sock = sock; |
229 | sock->sk->sk_allocation = GFP_NOFS; | 233 | sock->sk->sk_allocation = GFP_NOFS; |
230 | 234 | ||
235 | #ifdef CONFIG_LOCKDEP | ||
236 | lockdep_set_class(&sock->sk->sk_lock, &socket_class); | ||
237 | #endif | ||
238 | |||
231 | set_sock_callbacks(sock, con); | 239 | set_sock_callbacks(sock, con); |
232 | 240 | ||
233 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | 241 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); |
@@ -333,6 +341,7 @@ static void reset_connection(struct ceph_connection *con) | |||
333 | con->out_msg = NULL; | 341 | con->out_msg = NULL; |
334 | } | 342 | } |
335 | con->in_seq = 0; | 343 | con->in_seq = 0; |
344 | con->in_seq_acked = 0; | ||
336 | } | 345 | } |
337 | 346 | ||
338 | /* | 347 | /* |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 21c6623c4b07..2e2c15eed82a 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -314,71 +314,6 @@ bad: | |||
314 | return ERR_PTR(err); | 314 | return ERR_PTR(err); |
315 | } | 315 | } |
316 | 316 | ||
317 | |||
318 | /* | ||
319 | * osd map | ||
320 | */ | ||
321 | void ceph_osdmap_destroy(struct ceph_osdmap *map) | ||
322 | { | ||
323 | dout("osdmap_destroy %p\n", map); | ||
324 | if (map->crush) | ||
325 | crush_destroy(map->crush); | ||
326 | while (!RB_EMPTY_ROOT(&map->pg_temp)) { | ||
327 | struct ceph_pg_mapping *pg = | ||
328 | rb_entry(rb_first(&map->pg_temp), | ||
329 | struct ceph_pg_mapping, node); | ||
330 | rb_erase(&pg->node, &map->pg_temp); | ||
331 | kfree(pg); | ||
332 | } | ||
333 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | ||
334 | struct ceph_pg_pool_info *pi = | ||
335 | rb_entry(rb_first(&map->pg_pools), | ||
336 | struct ceph_pg_pool_info, node); | ||
337 | rb_erase(&pi->node, &map->pg_pools); | ||
338 | kfree(pi); | ||
339 | } | ||
340 | kfree(map->osd_state); | ||
341 | kfree(map->osd_weight); | ||
342 | kfree(map->osd_addr); | ||
343 | kfree(map); | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * adjust max osd value. reallocate arrays. | ||
348 | */ | ||
349 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | ||
350 | { | ||
351 | u8 *state; | ||
352 | struct ceph_entity_addr *addr; | ||
353 | u32 *weight; | ||
354 | |||
355 | state = kcalloc(max, sizeof(*state), GFP_NOFS); | ||
356 | addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | ||
357 | weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | ||
358 | if (state == NULL || addr == NULL || weight == NULL) { | ||
359 | kfree(state); | ||
360 | kfree(addr); | ||
361 | kfree(weight); | ||
362 | return -ENOMEM; | ||
363 | } | ||
364 | |||
365 | /* copy old? */ | ||
366 | if (map->osd_state) { | ||
367 | memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | ||
368 | memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | ||
369 | memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | ||
370 | kfree(map->osd_state); | ||
371 | kfree(map->osd_addr); | ||
372 | kfree(map->osd_weight); | ||
373 | } | ||
374 | |||
375 | map->osd_state = state; | ||
376 | map->osd_weight = weight; | ||
377 | map->osd_addr = addr; | ||
378 | map->max_osd = max; | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /* | 317 | /* |
383 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid | 318 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid |
384 | * to a set of osds) | 319 | * to a set of osds) |
@@ -482,6 +417,13 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | |||
482 | return NULL; | 417 | return NULL; |
483 | } | 418 | } |
484 | 419 | ||
420 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | ||
421 | { | ||
422 | rb_erase(&pi->node, root); | ||
423 | kfree(pi->name); | ||
424 | kfree(pi); | ||
425 | } | ||
426 | |||
485 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | 427 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) |
486 | { | 428 | { |
487 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 429 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
@@ -490,6 +432,98 @@ void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | |||
490 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 432 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; |
491 | } | 433 | } |
492 | 434 | ||
435 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | ||
436 | { | ||
437 | struct ceph_pg_pool_info *pi; | ||
438 | u32 num, len, pool; | ||
439 | |||
440 | ceph_decode_32_safe(p, end, num, bad); | ||
441 | dout(" %d pool names\n", num); | ||
442 | while (num--) { | ||
443 | ceph_decode_32_safe(p, end, pool, bad); | ||
444 | ceph_decode_32_safe(p, end, len, bad); | ||
445 | dout(" pool %d len %d\n", pool, len); | ||
446 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
447 | if (pi) { | ||
448 | kfree(pi->name); | ||
449 | pi->name = kmalloc(len + 1, GFP_NOFS); | ||
450 | if (pi->name) { | ||
451 | memcpy(pi->name, *p, len); | ||
452 | pi->name[len] = '\0'; | ||
453 | dout(" name is %s\n", pi->name); | ||
454 | } | ||
455 | } | ||
456 | *p += len; | ||
457 | } | ||
458 | return 0; | ||
459 | |||
460 | bad: | ||
461 | return -EINVAL; | ||
462 | } | ||
463 | |||
464 | /* | ||
465 | * osd map | ||
466 | */ | ||
467 | void ceph_osdmap_destroy(struct ceph_osdmap *map) | ||
468 | { | ||
469 | dout("osdmap_destroy %p\n", map); | ||
470 | if (map->crush) | ||
471 | crush_destroy(map->crush); | ||
472 | while (!RB_EMPTY_ROOT(&map->pg_temp)) { | ||
473 | struct ceph_pg_mapping *pg = | ||
474 | rb_entry(rb_first(&map->pg_temp), | ||
475 | struct ceph_pg_mapping, node); | ||
476 | rb_erase(&pg->node, &map->pg_temp); | ||
477 | kfree(pg); | ||
478 | } | ||
479 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | ||
480 | struct ceph_pg_pool_info *pi = | ||
481 | rb_entry(rb_first(&map->pg_pools), | ||
482 | struct ceph_pg_pool_info, node); | ||
483 | __remove_pg_pool(&map->pg_pools, pi); | ||
484 | } | ||
485 | kfree(map->osd_state); | ||
486 | kfree(map->osd_weight); | ||
487 | kfree(map->osd_addr); | ||
488 | kfree(map); | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * adjust max osd value. reallocate arrays. | ||
493 | */ | ||
494 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | ||
495 | { | ||
496 | u8 *state; | ||
497 | struct ceph_entity_addr *addr; | ||
498 | u32 *weight; | ||
499 | |||
500 | state = kcalloc(max, sizeof(*state), GFP_NOFS); | ||
501 | addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | ||
502 | weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | ||
503 | if (state == NULL || addr == NULL || weight == NULL) { | ||
504 | kfree(state); | ||
505 | kfree(addr); | ||
506 | kfree(weight); | ||
507 | return -ENOMEM; | ||
508 | } | ||
509 | |||
510 | /* copy old? */ | ||
511 | if (map->osd_state) { | ||
512 | memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | ||
513 | memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | ||
514 | memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | ||
515 | kfree(map->osd_state); | ||
516 | kfree(map->osd_addr); | ||
517 | kfree(map->osd_weight); | ||
518 | } | ||
519 | |||
520 | map->osd_state = state; | ||
521 | map->osd_weight = weight; | ||
522 | map->osd_addr = addr; | ||
523 | map->max_osd = max; | ||
524 | return 0; | ||
525 | } | ||
526 | |||
493 | /* | 527 | /* |
494 | * decode a full map. | 528 | * decode a full map. |
495 | */ | 529 | */ |
@@ -526,7 +560,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
526 | ceph_decode_32_safe(p, end, max, bad); | 560 | ceph_decode_32_safe(p, end, max, bad); |
527 | while (max--) { | 561 | while (max--) { |
528 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); | 562 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); |
529 | pi = kmalloc(sizeof(*pi), GFP_NOFS); | 563 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
530 | if (!pi) | 564 | if (!pi) |
531 | goto bad; | 565 | goto bad; |
532 | pi->id = ceph_decode_32(p); | 566 | pi->id = ceph_decode_32(p); |
@@ -539,6 +573,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
539 | __decode_pool(p, pi); | 573 | __decode_pool(p, pi); |
540 | __insert_pg_pool(&map->pg_pools, pi); | 574 | __insert_pg_pool(&map->pg_pools, pi); |
541 | } | 575 | } |
576 | |||
577 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | ||
578 | goto bad; | ||
579 | |||
542 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 580 | ceph_decode_32_safe(p, end, map->pool_max, bad); |
543 | 581 | ||
544 | ceph_decode_32_safe(p, end, map->flags, bad); | 582 | ceph_decode_32_safe(p, end, map->flags, bad); |
@@ -712,7 +750,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
712 | } | 750 | } |
713 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 751 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
714 | if (!pi) { | 752 | if (!pi) { |
715 | pi = kmalloc(sizeof(*pi), GFP_NOFS); | 753 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
716 | if (!pi) { | 754 | if (!pi) { |
717 | err = -ENOMEM; | 755 | err = -ENOMEM; |
718 | goto bad; | 756 | goto bad; |
@@ -722,6 +760,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
722 | } | 760 | } |
723 | __decode_pool(p, pi); | 761 | __decode_pool(p, pi); |
724 | } | 762 | } |
763 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | ||
764 | goto bad; | ||
725 | 765 | ||
726 | /* old_pool */ | 766 | /* old_pool */ |
727 | ceph_decode_32_safe(p, end, len, bad); | 767 | ceph_decode_32_safe(p, end, len, bad); |
@@ -730,10 +770,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
730 | 770 | ||
731 | ceph_decode_32_safe(p, end, pool, bad); | 771 | ceph_decode_32_safe(p, end, pool, bad); |
732 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 772 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
733 | if (pi) { | 773 | if (pi) |
734 | rb_erase(&pi->node, &map->pg_pools); | 774 | __remove_pg_pool(&map->pg_pools, pi); |
735 | kfree(pi); | ||
736 | } | ||
737 | } | 775 | } |
738 | 776 | ||
739 | /* new_up */ | 777 | /* new_up */ |
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 1fb55afb2642..8bc9f1e4f562 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h | |||
@@ -23,6 +23,7 @@ struct ceph_pg_pool_info { | |||
23 | int id; | 23 | int id; |
24 | struct ceph_pg_pool v; | 24 | struct ceph_pg_pool v; |
25 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; | 25 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; |
26 | char *name; | ||
26 | }; | 27 | }; |
27 | 28 | ||
28 | struct ceph_pg_mapping { | 29 | struct ceph_pg_mapping { |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 26ac8b89a676..a1fc1d017b58 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -11,8 +11,10 @@ | |||
11 | /* | 11 | /* |
12 | * osdmap encoding versions | 12 | * osdmap encoding versions |
13 | */ | 13 | */ |
14 | #define CEPH_OSDMAP_INC_VERSION 4 | 14 | #define CEPH_OSDMAP_INC_VERSION 5 |
15 | #define CEPH_OSDMAP_VERSION 4 | 15 | #define CEPH_OSDMAP_INC_VERSION_EXT 5 |
16 | #define CEPH_OSDMAP_VERSION 5 | ||
17 | #define CEPH_OSDMAP_VERSION_EXT 5 | ||
16 | 18 | ||
17 | /* | 19 | /* |
18 | * fs id | 20 | * fs id |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index e6f9bc57d472..2b881262ef67 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -431,8 +431,7 @@ static int dup_array(u64 **dst, __le64 *src, int num) | |||
431 | * Caller must hold snap_rwsem for read (i.e., the realm topology won't | 431 | * Caller must hold snap_rwsem for read (i.e., the realm topology won't |
432 | * change). | 432 | * change). |
433 | */ | 433 | */ |
434 | void ceph_queue_cap_snap(struct ceph_inode_info *ci, | 434 | void ceph_queue_cap_snap(struct ceph_inode_info *ci) |
435 | struct ceph_snap_context *snapc) | ||
436 | { | 435 | { |
437 | struct inode *inode = &ci->vfs_inode; | 436 | struct inode *inode = &ci->vfs_inode; |
438 | struct ceph_cap_snap *capsnap; | 437 | struct ceph_cap_snap *capsnap; |
@@ -451,10 +450,11 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, | |||
451 | as no new writes are allowed to start when pending, so any | 450 | as no new writes are allowed to start when pending, so any |
452 | writes in progress now were started before the previous | 451 | writes in progress now were started before the previous |
453 | cap_snap. lucky us. */ | 452 | cap_snap. lucky us. */ |
454 | dout("queue_cap_snap %p snapc %p seq %llu used %d" | 453 | dout("queue_cap_snap %p already pending\n", inode); |
455 | " already pending\n", inode, snapc, snapc->seq, used); | ||
456 | kfree(capsnap); | 454 | kfree(capsnap); |
457 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { | 455 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { |
456 | struct ceph_snap_context *snapc = ci->i_head_snapc; | ||
457 | |||
458 | igrab(inode); | 458 | igrab(inode); |
459 | 459 | ||
460 | atomic_set(&capsnap->nref, 1); | 460 | atomic_set(&capsnap->nref, 1); |
@@ -463,7 +463,6 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, | |||
463 | INIT_LIST_HEAD(&capsnap->flushing_item); | 463 | INIT_LIST_HEAD(&capsnap->flushing_item); |
464 | 464 | ||
465 | capsnap->follows = snapc->seq - 1; | 465 | capsnap->follows = snapc->seq - 1; |
466 | capsnap->context = ceph_get_snap_context(snapc); | ||
467 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 466 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
468 | capsnap->dirty = __ceph_caps_dirty(ci); | 467 | capsnap->dirty = __ceph_caps_dirty(ci); |
469 | 468 | ||
@@ -480,7 +479,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci, | |||
480 | snapshot. */ | 479 | snapshot. */ |
481 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; | 480 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; |
482 | ci->i_wrbuffer_ref_head = 0; | 481 | ci->i_wrbuffer_ref_head = 0; |
483 | ceph_put_snap_context(ci->i_head_snapc); | 482 | capsnap->context = snapc; |
484 | ci->i_head_snapc = NULL; | 483 | ci->i_head_snapc = NULL; |
485 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); | 484 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); |
486 | 485 | ||
@@ -522,15 +521,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
522 | capsnap->ctime = inode->i_ctime; | 521 | capsnap->ctime = inode->i_ctime; |
523 | capsnap->time_warp_seq = ci->i_time_warp_seq; | 522 | capsnap->time_warp_seq = ci->i_time_warp_seq; |
524 | if (capsnap->dirty_pages) { | 523 | if (capsnap->dirty_pages) { |
525 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu " | 524 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu " |
526 | "still has %d dirty pages\n", inode, capsnap, | 525 | "still has %d dirty pages\n", inode, capsnap, |
527 | capsnap->context, capsnap->context->seq, | 526 | capsnap->context, capsnap->context->seq, |
528 | capsnap->size, capsnap->dirty_pages); | 527 | ceph_cap_string(capsnap->dirty), capsnap->size, |
528 | capsnap->dirty_pages); | ||
529 | return 0; | 529 | return 0; |
530 | } | 530 | } |
531 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu s=%llu clean\n", | 531 | dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n", |
532 | inode, capsnap, capsnap->context, | 532 | inode, capsnap, capsnap->context, |
533 | capsnap->context->seq, capsnap->size); | 533 | capsnap->context->seq, ceph_cap_string(capsnap->dirty), |
534 | capsnap->size); | ||
534 | 535 | ||
535 | spin_lock(&mdsc->snap_flush_lock); | 536 | spin_lock(&mdsc->snap_flush_lock); |
536 | list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); | 537 | list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); |
@@ -602,7 +603,7 @@ more: | |||
602 | if (lastinode) | 603 | if (lastinode) |
603 | iput(lastinode); | 604 | iput(lastinode); |
604 | lastinode = inode; | 605 | lastinode = inode; |
605 | ceph_queue_cap_snap(ci, realm->cached_context); | 606 | ceph_queue_cap_snap(ci); |
606 | spin_lock(&realm->inodes_with_caps_lock); | 607 | spin_lock(&realm->inodes_with_caps_lock); |
607 | } | 608 | } |
608 | spin_unlock(&realm->inodes_with_caps_lock); | 609 | spin_unlock(&realm->inodes_with_caps_lock); |
@@ -824,8 +825,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
824 | spin_unlock(&realm->inodes_with_caps_lock); | 825 | spin_unlock(&realm->inodes_with_caps_lock); |
825 | spin_unlock(&inode->i_lock); | 826 | spin_unlock(&inode->i_lock); |
826 | 827 | ||
827 | ceph_queue_cap_snap(ci, | 828 | ceph_queue_cap_snap(ci); |
828 | ci->i_snap_realm->cached_context); | ||
829 | 829 | ||
830 | iput(inode); | 830 | iput(inode); |
831 | continue; | 831 | continue; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ca702c67bc66..e30dfbb056c3 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -715,8 +715,7 @@ extern int ceph_update_snap_trace(struct ceph_mds_client *m, | |||
715 | extern void ceph_handle_snap(struct ceph_mds_client *mdsc, | 715 | extern void ceph_handle_snap(struct ceph_mds_client *mdsc, |
716 | struct ceph_mds_session *session, | 716 | struct ceph_mds_session *session, |
717 | struct ceph_msg *msg); | 717 | struct ceph_msg *msg); |
718 | extern void ceph_queue_cap_snap(struct ceph_inode_info *ci, | 718 | extern void ceph_queue_cap_snap(struct ceph_inode_info *ci); |
719 | struct ceph_snap_context *snapc); | ||
720 | extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | 719 | extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, |
721 | struct ceph_cap_snap *capsnap); | 720 | struct ceph_cap_snap *capsnap); |
722 | extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); | 721 | extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 05cd85317f6f..fd9698215759 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -820,10 +820,10 @@ xfs_reclaim_inode( | |||
820 | * call into reclaim to find it in a clean state instead of waiting for | 820 | * call into reclaim to find it in a clean state instead of waiting for |
821 | * it now. We also don't return errors here - if the error is transient | 821 | * it now. We also don't return errors here - if the error is transient |
822 | * then the next reclaim pass will flush the inode, and if the error | 822 | * then the next reclaim pass will flush the inode, and if the error |
823 | * is permanent then the next sync reclaim will relcaim the inode and | 823 | * is permanent then the next sync reclaim will reclaim the inode and |
824 | * pass on the error. | 824 | * pass on the error. |
825 | */ | 825 | */ |
826 | if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 826 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
827 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, | 827 | xfs_fs_cmn_err(CE_WARN, ip->i_mount, |
828 | "inode 0x%llx background reclaim flush failed with %d", | 828 | "inode 0x%llx background reclaim flush failed with %d", |
829 | (long long)ip->i_ino, error); | 829 | (long long)ip->i_ino, error); |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e8fba92d7cd9..2be019136287 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -745,9 +745,16 @@ xfs_log_move_tail(xfs_mount_t *mp, | |||
745 | 745 | ||
746 | /* | 746 | /* |
747 | * Determine if we have a transaction that has gone to disk | 747 | * Determine if we have a transaction that has gone to disk |
748 | * that needs to be covered. Log activity needs to be idle (no AIL and | 748 | * that needs to be covered. To begin the transition to the idle state |
749 | * nothing in the iclogs). And, we need to be in the right state indicating | 749 | * firstly the log needs to be idle (no AIL and nothing in the iclogs). |
750 | * something has gone out. | 750 | * If we are then in a state where covering is needed, the caller is informed |
751 | * that dummy transactions are required to move the log into the idle state. | ||
752 | * | ||
753 | * Because this is called as part of the sync process, we should also indicate | ||
754 | * that dummy transactions should be issued in anything but the covered or | ||
755 | * idle states. This ensures that the log tail is accurately reflected in | ||
756 | * the log at the end of the sync, hence if a crash occurrs avoids replay | ||
757 | * of transactions where the metadata is already on disk. | ||
751 | */ | 758 | */ |
752 | int | 759 | int |
753 | xfs_log_need_covered(xfs_mount_t *mp) | 760 | xfs_log_need_covered(xfs_mount_t *mp) |
@@ -759,17 +766,24 @@ xfs_log_need_covered(xfs_mount_t *mp) | |||
759 | return 0; | 766 | return 0; |
760 | 767 | ||
761 | spin_lock(&log->l_icloglock); | 768 | spin_lock(&log->l_icloglock); |
762 | if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || | 769 | switch (log->l_covered_state) { |
763 | (log->l_covered_state == XLOG_STATE_COVER_NEED2)) | 770 | case XLOG_STATE_COVER_DONE: |
764 | && !xfs_trans_ail_tail(log->l_ailp) | 771 | case XLOG_STATE_COVER_DONE2: |
765 | && xlog_iclogs_empty(log)) { | 772 | case XLOG_STATE_COVER_IDLE: |
766 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | 773 | break; |
767 | log->l_covered_state = XLOG_STATE_COVER_DONE; | 774 | case XLOG_STATE_COVER_NEED: |
768 | else { | 775 | case XLOG_STATE_COVER_NEED2: |
769 | ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2); | 776 | if (!xfs_trans_ail_tail(log->l_ailp) && |
770 | log->l_covered_state = XLOG_STATE_COVER_DONE2; | 777 | xlog_iclogs_empty(log)) { |
778 | if (log->l_covered_state == XLOG_STATE_COVER_NEED) | ||
779 | log->l_covered_state = XLOG_STATE_COVER_DONE; | ||
780 | else | ||
781 | log->l_covered_state = XLOG_STATE_COVER_DONE2; | ||
771 | } | 782 | } |
783 | /* FALLTHRU */ | ||
784 | default: | ||
772 | needed = 1; | 785 | needed = 1; |
786 | break; | ||
773 | } | 787 | } |
774 | spin_unlock(&log->l_icloglock); | 788 | spin_unlock(&log->l_icloglock); |
775 | return needed; | 789 | return needed; |