diff options
Diffstat (limited to 'fs')
44 files changed, 355 insertions, 209 deletions
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) | |||
712 | */ | 712 | */ |
713 | ret = retry(iocb); | 713 | ret = retry(iocb); |
714 | 714 | ||
715 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) | 715 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { |
716 | /* | ||
717 | * There's no easy way to restart the syscall since other AIO's | ||
718 | * may be already running. Just fail this IO with EINTR. | ||
719 | */ | ||
720 | if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || | ||
721 | ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) | ||
722 | ret = -EINTR; | ||
716 | aio_complete(iocb, ret, 0); | 723 | aio_complete(iocb, ret, 0); |
724 | } | ||
717 | out: | 725 | out: |
718 | spin_lock_irq(&ctx->ctx_lock); | 726 | spin_lock_irq(&ctx->ctx_lock); |
719 | 727 | ||
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27e..0fcd2640c23f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
@@ -3,6 +3,7 @@ config CEPH_FS | |||
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select CRYPTO_AES | 5 | select CRYPTO_AES |
6 | select CRYPTO | ||
6 | help | 7 | help |
7 | Choose Y or M here to include support for mounting the | 8 | Choose Y or M here to include support for mounting the |
8 | experimental Ceph distributed file system. Ceph is an extremely | 9 | experimental Ceph distributed file system. Ceph is an extremely |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4cfce1ee31fa..efbc604001c8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
411 | if (i_size < page_off + len) | 411 | if (i_size < page_off + len) |
412 | len = i_size - page_off; | 412 | len = i_size - page_off; |
413 | 413 | ||
414 | dout("writepage %p page %p index %lu on %llu~%u\n", | 414 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
415 | inode, page, page->index, page_off, len); | 415 | inode, page, page->index, page_off, len, snapc); |
416 | 416 | ||
417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); |
418 | if (writeback_stat > | 418 | if (writeback_stat > |
@@ -766,7 +766,8 @@ get_more_pages: | |||
766 | /* ok */ | 766 | /* ok */ |
767 | if (locked_pages == 0) { | 767 | if (locked_pages == 0) { |
768 | /* prepare async write request */ | 768 | /* prepare async write request */ |
769 | offset = page->index << PAGE_CACHE_SHIFT; | 769 | offset = (unsigned long long)page->index |
770 | << PAGE_CACHE_SHIFT; | ||
770 | len = wsize; | 771 | len = wsize; |
771 | req = ceph_osdc_new_request(&client->osdc, | 772 | req = ceph_osdc_new_request(&client->osdc, |
772 | &ci->i_layout, | 773 | &ci->i_layout, |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a2069b6680ae..5e9da996a151 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) | |||
814 | used |= CEPH_CAP_PIN; | 814 | used |= CEPH_CAP_PIN; |
815 | if (ci->i_rd_ref) | 815 | if (ci->i_rd_ref) |
816 | used |= CEPH_CAP_FILE_RD; | 816 | used |= CEPH_CAP_FILE_RD; |
817 | if (ci->i_rdcache_ref || ci->i_rdcache_gen) | 817 | if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) |
818 | used |= CEPH_CAP_FILE_CACHE; | 818 | used |= CEPH_CAP_FILE_CACHE; |
819 | if (ci->i_wr_ref) | 819 | if (ci->i_wr_ref) |
820 | used |= CEPH_CAP_FILE_WR; | 820 | used |= CEPH_CAP_FILE_WR; |
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1195 | * asynchronously back to the MDS once sync writes complete and dirty | 1195 | * asynchronously back to the MDS once sync writes complete and dirty |
1196 | * data is written out. | 1196 | * data is written out. |
1197 | * | 1197 | * |
1198 | * Unless @again is true, skip cap_snaps that were already sent to | ||
1199 | * the MDS (i.e., during this session). | ||
1200 | * | ||
1198 | * Called under i_lock. Takes s_mutex as needed. | 1201 | * Called under i_lock. Takes s_mutex as needed. |
1199 | */ | 1202 | */ |
1200 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1203 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1201 | struct ceph_mds_session **psession) | 1204 | struct ceph_mds_session **psession, |
1205 | int again) | ||
1202 | __releases(ci->vfs_inode->i_lock) | 1206 | __releases(ci->vfs_inode->i_lock) |
1203 | __acquires(ci->vfs_inode->i_lock) | 1207 | __acquires(ci->vfs_inode->i_lock) |
1204 | { | 1208 | { |
@@ -1227,7 +1231,7 @@ retry: | |||
1227 | * pages to be written out. | 1231 | * pages to be written out. |
1228 | */ | 1232 | */ |
1229 | if (capsnap->dirty_pages || capsnap->writing) | 1233 | if (capsnap->dirty_pages || capsnap->writing) |
1230 | continue; | 1234 | break; |
1231 | 1235 | ||
1232 | /* | 1236 | /* |
1233 | * if cap writeback already occurred, we should have dropped | 1237 | * if cap writeback already occurred, we should have dropped |
@@ -1240,6 +1244,13 @@ retry: | |||
1240 | dout("no auth cap (migrating?), doing nothing\n"); | 1244 | dout("no auth cap (migrating?), doing nothing\n"); |
1241 | goto out; | 1245 | goto out; |
1242 | } | 1246 | } |
1247 | |||
1248 | /* only flush each capsnap once */ | ||
1249 | if (!again && !list_empty(&capsnap->flushing_item)) { | ||
1250 | dout("already flushed %p, skipping\n", capsnap); | ||
1251 | continue; | ||
1252 | } | ||
1253 | |||
1243 | mds = ci->i_auth_cap->session->s_mds; | 1254 | mds = ci->i_auth_cap->session->s_mds; |
1244 | mseq = ci->i_auth_cap->mseq; | 1255 | mseq = ci->i_auth_cap->mseq; |
1245 | 1256 | ||
@@ -1276,8 +1287,8 @@ retry: | |||
1276 | &session->s_cap_snaps_flushing); | 1287 | &session->s_cap_snaps_flushing); |
1277 | spin_unlock(&inode->i_lock); | 1288 | spin_unlock(&inode->i_lock); |
1278 | 1289 | ||
1279 | dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", | 1290 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", |
1280 | inode, capsnap, next_follows, capsnap->size); | 1291 | inode, capsnap, capsnap->follows, capsnap->flush_tid); |
1281 | send_cap_msg(session, ceph_vino(inode).ino, 0, | 1292 | send_cap_msg(session, ceph_vino(inode).ino, 0, |
1282 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | 1293 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, |
1283 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, | 1294 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, |
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1314 | struct inode *inode = &ci->vfs_inode; | 1325 | struct inode *inode = &ci->vfs_inode; |
1315 | 1326 | ||
1316 | spin_lock(&inode->i_lock); | 1327 | spin_lock(&inode->i_lock); |
1317 | __ceph_flush_snaps(ci, NULL); | 1328 | __ceph_flush_snaps(ci, NULL, 0); |
1318 | spin_unlock(&inode->i_lock); | 1329 | spin_unlock(&inode->i_lock); |
1319 | } | 1330 | } |
1320 | 1331 | ||
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1477 | 1488 | ||
1478 | /* flush snaps first time around only */ | 1489 | /* flush snaps first time around only */ |
1479 | if (!list_empty(&ci->i_cap_snaps)) | 1490 | if (!list_empty(&ci->i_cap_snaps)) |
1480 | __ceph_flush_snaps(ci, &session); | 1491 | __ceph_flush_snaps(ci, &session, 0); |
1481 | goto retry_locked; | 1492 | goto retry_locked; |
1482 | retry: | 1493 | retry: |
1483 | spin_lock(&inode->i_lock); | 1494 | spin_lock(&inode->i_lock); |
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | |||
1894 | if (cap && cap->session == session) { | 1905 | if (cap && cap->session == session) { |
1895 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, | 1906 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, |
1896 | cap, capsnap); | 1907 | cap, capsnap); |
1897 | __ceph_flush_snaps(ci, &session); | 1908 | __ceph_flush_snaps(ci, &session, 1); |
1898 | } else { | 1909 | } else { |
1899 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 1910 | pr_err("%p auth cap %p not mds%d ???\n", inode, |
1900 | cap, session->s_mds); | 1911 | cap, session->s_mds); |
@@ -2272,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2272 | { | 2283 | { |
2273 | struct ceph_inode_info *ci = ceph_inode(inode); | 2284 | struct ceph_inode_info *ci = ceph_inode(inode); |
2274 | int mds = session->s_mds; | 2285 | int mds = session->s_mds; |
2275 | int seq = le32_to_cpu(grant->seq); | 2286 | unsigned seq = le32_to_cpu(grant->seq); |
2287 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2276 | int newcaps = le32_to_cpu(grant->caps); | 2288 | int newcaps = le32_to_cpu(grant->caps); |
2277 | int issued, implemented, used, wanted, dirty; | 2289 | int issued, implemented, used, wanted, dirty; |
2278 | u64 size = le64_to_cpu(grant->size); | 2290 | u64 size = le64_to_cpu(grant->size); |
@@ -2284,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2284 | int revoked_rdcache = 0; | 2296 | int revoked_rdcache = 0; |
2285 | int queue_invalidate = 0; | 2297 | int queue_invalidate = 0; |
2286 | 2298 | ||
2287 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2299 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", |
2288 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2300 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); |
2289 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2301 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2290 | inode->i_size); | 2302 | inode->i_size); |
2291 | 2303 | ||
@@ -2381,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2381 | } | 2393 | } |
2382 | 2394 | ||
2383 | cap->seq = seq; | 2395 | cap->seq = seq; |
2396 | cap->issue_seq = issue_seq; | ||
2384 | 2397 | ||
2385 | /* file layout may have changed */ | 2398 | /* file layout may have changed */ |
2386 | ci->i_layout = grant->layout; | 2399 | ci->i_layout = grant->layout; |
@@ -2763,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2763 | if (op == CEPH_CAP_OP_IMPORT) | 2776 | if (op == CEPH_CAP_OP_IMPORT) |
2764 | __queue_cap_release(session, vino.ino, cap_id, | 2777 | __queue_cap_release(session, vino.ino, cap_id, |
2765 | mseq, seq); | 2778 | mseq, seq); |
2766 | 2779 | goto flush_cap_releases; | |
2767 | /* | ||
2768 | * send any full release message to try to move things | ||
2769 | * along for the mds (who clearly thinks we still have this | ||
2770 | * cap). | ||
2771 | */ | ||
2772 | ceph_add_cap_releases(mdsc, session); | ||
2773 | ceph_send_cap_releases(mdsc, session); | ||
2774 | goto done; | ||
2775 | } | 2780 | } |
2776 | 2781 | ||
2777 | /* these will work even if we don't have a cap yet */ | 2782 | /* these will work even if we don't have a cap yet */ |
@@ -2799,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2799 | dout(" no cap on %p ino %llx.%llx from mds%d\n", | 2804 | dout(" no cap on %p ino %llx.%llx from mds%d\n", |
2800 | inode, ceph_ino(inode), ceph_snap(inode), mds); | 2805 | inode, ceph_ino(inode), ceph_snap(inode), mds); |
2801 | spin_unlock(&inode->i_lock); | 2806 | spin_unlock(&inode->i_lock); |
2802 | goto done; | 2807 | goto flush_cap_releases; |
2803 | } | 2808 | } |
2804 | 2809 | ||
2805 | /* note that each of these drops i_lock for us */ | 2810 | /* note that each of these drops i_lock for us */ |
@@ -2823,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2823 | ceph_cap_op_name(op)); | 2828 | ceph_cap_op_name(op)); |
2824 | } | 2829 | } |
2825 | 2830 | ||
2831 | goto done; | ||
2832 | |||
2833 | flush_cap_releases: | ||
2834 | /* | ||
2835 | * send any full release message to try to move things | ||
2836 | * along for the mds (who clearly thinks we still have this | ||
2837 | * cap). | ||
2838 | */ | ||
2839 | ceph_add_cap_releases(mdsc, session); | ||
2840 | ceph_send_cap_releases(mdsc, session); | ||
2841 | |||
2826 | done: | 2842 | done: |
2827 | mutex_unlock(&session->s_mutex); | 2843 | mutex_unlock(&session->s_mutex); |
2828 | done_unlocked: | 2844 | done_unlocked: |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6e4f43ff23ec..a1986eb52045 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -1021,11 +1021,15 @@ out_touch: | |||
1021 | static void ceph_dentry_release(struct dentry *dentry) | 1021 | static void ceph_dentry_release(struct dentry *dentry) |
1022 | { | 1022 | { |
1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
1024 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1024 | struct inode *parent_inode = NULL; |
1025 | u64 snapid = ceph_snap(parent_inode); | 1025 | u64 snapid = CEPH_NOSNAP; |
1026 | 1026 | ||
1027 | if (!IS_ROOT(dentry)) { | ||
1028 | parent_inode = dentry->d_parent->d_inode; | ||
1029 | if (parent_inode) | ||
1030 | snapid = ceph_snap(parent_inode); | ||
1031 | } | ||
1027 | dout("dentry_release %p parent %p\n", dentry, parent_inode); | 1032 | dout("dentry_release %p parent %p\n", dentry, parent_inode); |
1028 | |||
1029 | if (parent_inode && snapid != CEPH_SNAPDIR) { | 1033 | if (parent_inode && snapid != CEPH_SNAPDIR) { |
1030 | struct ceph_inode_info *ci = ceph_inode(parent_inode); | 1034 | struct ceph_inode_info *ci = ceph_inode(parent_inode); |
1031 | 1035 | ||
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4480cb1c63e7..e38423e82f2e 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -42,32 +42,37 @@ struct ceph_nfs_confh { | |||
42 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | 42 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, |
43 | int connectable) | 43 | int connectable) |
44 | { | 44 | { |
45 | int type; | ||
45 | struct ceph_nfs_fh *fh = (void *)rawfh; | 46 | struct ceph_nfs_fh *fh = (void *)rawfh; |
46 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 47 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
47 | struct dentry *parent = dentry->d_parent; | 48 | struct dentry *parent = dentry->d_parent; |
48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
49 | int type; | 50 | int connected_handle_length = sizeof(*cfh)/4; |
51 | int handle_length = sizeof(*fh)/4; | ||
50 | 52 | ||
51 | /* don't re-export snaps */ | 53 | /* don't re-export snaps */ |
52 | if (ceph_snap(inode) != CEPH_NOSNAP) | 54 | if (ceph_snap(inode) != CEPH_NOSNAP) |
53 | return -EINVAL; | 55 | return -EINVAL; |
54 | 56 | ||
55 | if (*max_len >= sizeof(*cfh)) { | 57 | if (*max_len >= connected_handle_length) { |
56 | dout("encode_fh %p connectable\n", dentry); | 58 | dout("encode_fh %p connectable\n", dentry); |
57 | cfh->ino = ceph_ino(dentry->d_inode); | 59 | cfh->ino = ceph_ino(dentry->d_inode); |
58 | cfh->parent_ino = ceph_ino(parent->d_inode); | 60 | cfh->parent_ino = ceph_ino(parent->d_inode); |
59 | cfh->parent_name_hash = parent->d_name.hash; | 61 | cfh->parent_name_hash = parent->d_name.hash; |
60 | *max_len = sizeof(*cfh); | 62 | *max_len = connected_handle_length; |
61 | type = 2; | 63 | type = 2; |
62 | } else if (*max_len > sizeof(*fh)) { | 64 | } else if (*max_len >= handle_length) { |
63 | if (connectable) | 65 | if (connectable) { |
64 | return -ENOSPC; | 66 | *max_len = connected_handle_length; |
67 | return 255; | ||
68 | } | ||
65 | dout("encode_fh %p\n", dentry); | 69 | dout("encode_fh %p\n", dentry); |
66 | fh->ino = ceph_ino(dentry->d_inode); | 70 | fh->ino = ceph_ino(dentry->d_inode); |
67 | *max_len = sizeof(*fh); | 71 | *max_len = handle_length; |
68 | type = 1; | 72 | type = 1; |
69 | } else { | 73 | } else { |
70 | return -ENOSPC; | 74 | *max_len = handle_length; |
75 | return 255; | ||
71 | } | 76 | } |
72 | return type; | 77 | return type; |
73 | } | 78 | } |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c044a4f0457..66e4da6dba22 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -697,7 +697,7 @@ more: | |||
697 | * start_request so that a tid has been assigned. | 697 | * start_request so that a tid has been assigned. |
698 | */ | 698 | */ |
699 | spin_lock(&ci->i_unsafe_lock); | 699 | spin_lock(&ci->i_unsafe_lock); |
700 | list_add(&ci->i_unsafe_writes, &req->r_unsafe_item); | 700 | list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); |
701 | spin_unlock(&ci->i_unsafe_lock); | 701 | spin_unlock(&ci->i_unsafe_lock); |
702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
703 | } | 703 | } |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e7cca414da03..62377ec37edf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
845 | * the caller) if we fail. | 845 | * the caller) if we fail. |
846 | */ | 846 | */ |
847 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | 847 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, |
848 | bool *prehash) | 848 | bool *prehash, bool set_offset) |
849 | { | 849 | { |
850 | struct dentry *realdn; | 850 | struct dentry *realdn; |
851 | 851 | ||
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
877 | } | 877 | } |
878 | if ((!prehash || *prehash) && d_unhashed(dn)) | 878 | if ((!prehash || *prehash) && d_unhashed(dn)) |
879 | d_rehash(dn); | 879 | d_rehash(dn); |
880 | ceph_set_dentry_offset(dn); | 880 | if (set_offset) |
881 | ceph_set_dentry_offset(dn); | ||
881 | out: | 882 | out: |
882 | return dn; | 883 | return dn; |
883 | } | 884 | } |
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1062 | d_delete(dn); | 1063 | d_delete(dn); |
1063 | goto done; | 1064 | goto done; |
1064 | } | 1065 | } |
1065 | dn = splice_dentry(dn, in, &have_lease); | 1066 | dn = splice_dentry(dn, in, &have_lease, true); |
1066 | if (IS_ERR(dn)) { | 1067 | if (IS_ERR(dn)) { |
1067 | err = PTR_ERR(dn); | 1068 | err = PTR_ERR(dn); |
1068 | goto done; | 1069 | goto done; |
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1105 | goto done; | 1106 | goto done; |
1106 | } | 1107 | } |
1107 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1108 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1108 | dn = splice_dentry(dn, in, NULL); | 1109 | dn = splice_dentry(dn, in, NULL, true); |
1109 | if (IS_ERR(dn)) { | 1110 | if (IS_ERR(dn)) { |
1110 | err = PTR_ERR(dn); | 1111 | err = PTR_ERR(dn); |
1111 | goto done; | 1112 | goto done; |
@@ -1237,7 +1238,7 @@ retry_lookup: | |||
1237 | err = PTR_ERR(in); | 1238 | err = PTR_ERR(in); |
1238 | goto out; | 1239 | goto out; |
1239 | } | 1240 | } |
1240 | dn = splice_dentry(dn, in, NULL); | 1241 | dn = splice_dentry(dn, in, NULL, false); |
1241 | if (IS_ERR(dn)) | 1242 | if (IS_ERR(dn)) |
1242 | dn = NULL; | 1243 | dn = NULL; |
1243 | } | 1244 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f091b1351786..fad95f8f2608 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2374 | num_fcntl_locks, | 2374 | num_fcntl_locks, |
2375 | num_flock_locks); | 2375 | num_flock_locks); |
2376 | unlock_kernel(); | 2376 | unlock_kernel(); |
2377 | } else { | ||
2378 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2377 | } | 2379 | } |
2378 | 2380 | ||
2379 | out_free: | 2381 | out_free: |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index dfced1dacbcd..3b5571b8ce22 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -549,7 +549,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, | |||
549 | */ | 549 | */ |
550 | static void __cancel_request(struct ceph_osd_request *req) | 550 | static void __cancel_request(struct ceph_osd_request *req) |
551 | { | 551 | { |
552 | if (req->r_sent) { | 552 | if (req->r_sent && req->r_osd) { |
553 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); | 553 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); |
554 | req->r_sent = 0; | 554 | req->r_sent = 0; |
555 | } | 555 | } |
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d364..46a368b6dce5 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c | |||
@@ -5,10 +5,18 @@ | |||
5 | 5 | ||
6 | #include "pagelist.h" | 6 | #include "pagelist.h" |
7 | 7 | ||
8 | static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) | ||
9 | { | ||
10 | struct page *page = list_entry(pl->head.prev, struct page, | ||
11 | lru); | ||
12 | kunmap(page); | ||
13 | } | ||
14 | |||
8 | int ceph_pagelist_release(struct ceph_pagelist *pl) | 15 | int ceph_pagelist_release(struct ceph_pagelist *pl) |
9 | { | 16 | { |
10 | if (pl->mapped_tail) | 17 | if (pl->mapped_tail) |
11 | kunmap(pl->mapped_tail); | 18 | ceph_pagelist_unmap_tail(pl); |
19 | |||
12 | while (!list_empty(&pl->head)) { | 20 | while (!list_empty(&pl->head)) { |
13 | struct page *page = list_first_entry(&pl->head, struct page, | 21 | struct page *page = list_first_entry(&pl->head, struct page, |
14 | lru); | 22 | lru); |
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | |||
26 | pl->room += PAGE_SIZE; | 34 | pl->room += PAGE_SIZE; |
27 | list_add_tail(&page->lru, &pl->head); | 35 | list_add_tail(&page->lru, &pl->head); |
28 | if (pl->mapped_tail) | 36 | if (pl->mapped_tail) |
29 | kunmap(pl->mapped_tail); | 37 | ceph_pagelist_unmap_tail(pl); |
30 | pl->mapped_tail = kmap(page); | 38 | pl->mapped_tail = kmap(page); |
31 | return 0; | 39 | return 0; |
32 | } | 40 | } |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4868b9dcac5a..190b6c4a6f2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
119 | INIT_LIST_HEAD(&realm->children); | 119 | INIT_LIST_HEAD(&realm->children); |
120 | INIT_LIST_HEAD(&realm->child_item); | 120 | INIT_LIST_HEAD(&realm->child_item); |
121 | INIT_LIST_HEAD(&realm->empty_item); | 121 | INIT_LIST_HEAD(&realm->empty_item); |
122 | INIT_LIST_HEAD(&realm->dirty_item); | ||
122 | INIT_LIST_HEAD(&realm->inodes_with_caps); | 123 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
123 | spin_lock_init(&realm->inodes_with_caps_lock); | 124 | spin_lock_init(&realm->inodes_with_caps_lock); |
124 | __insert_snap_realm(&mdsc->snap_realms, realm); | 125 | __insert_snap_realm(&mdsc->snap_realms, realm); |
@@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
467 | INIT_LIST_HEAD(&capsnap->ci_item); | 468 | INIT_LIST_HEAD(&capsnap->ci_item); |
468 | INIT_LIST_HEAD(&capsnap->flushing_item); | 469 | INIT_LIST_HEAD(&capsnap->flushing_item); |
469 | 470 | ||
470 | capsnap->follows = snapc->seq - 1; | 471 | capsnap->follows = snapc->seq; |
471 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 472 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
472 | capsnap->dirty = dirty; | 473 | capsnap->dirty = dirty; |
473 | 474 | ||
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | |||
604 | struct ceph_snap_realm *realm; | 605 | struct ceph_snap_realm *realm; |
605 | int invalidate = 0; | 606 | int invalidate = 0; |
606 | int err = -ENOMEM; | 607 | int err = -ENOMEM; |
608 | LIST_HEAD(dirty_realms); | ||
607 | 609 | ||
608 | dout("update_snap_trace deletion=%d\n", deletion); | 610 | dout("update_snap_trace deletion=%d\n", deletion); |
609 | more: | 611 | more: |
@@ -626,24 +628,6 @@ more: | |||
626 | } | 628 | } |
627 | } | 629 | } |
628 | 630 | ||
629 | if (le64_to_cpu(ri->seq) > realm->seq) { | ||
630 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
631 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
632 | /* | ||
633 | * if the realm seq has changed, queue a cap_snap for every | ||
634 | * inode with open caps. we do this _before_ we update | ||
635 | * the realm info so that we prepare for writeback under the | ||
636 | * _previous_ snap context. | ||
637 | * | ||
638 | * ...unless it's a snap deletion! | ||
639 | */ | ||
640 | if (!deletion) | ||
641 | queue_realm_cap_snaps(realm); | ||
642 | } else { | ||
643 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
644 | realm->ino, realm, realm->seq); | ||
645 | } | ||
646 | |||
647 | /* ensure the parent is correct */ | 631 | /* ensure the parent is correct */ |
648 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); | 632 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); |
649 | if (err < 0) | 633 | if (err < 0) |
@@ -651,6 +635,8 @@ more: | |||
651 | invalidate += err; | 635 | invalidate += err; |
652 | 636 | ||
653 | if (le64_to_cpu(ri->seq) > realm->seq) { | 637 | if (le64_to_cpu(ri->seq) > realm->seq) { |
638 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
639 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
654 | /* update realm parameters, snap lists */ | 640 | /* update realm parameters, snap lists */ |
655 | realm->seq = le64_to_cpu(ri->seq); | 641 | realm->seq = le64_to_cpu(ri->seq); |
656 | realm->created = le64_to_cpu(ri->created); | 642 | realm->created = le64_to_cpu(ri->created); |
@@ -668,9 +654,17 @@ more: | |||
668 | if (err < 0) | 654 | if (err < 0) |
669 | goto fail; | 655 | goto fail; |
670 | 656 | ||
657 | /* queue realm for cap_snap creation */ | ||
658 | list_add(&realm->dirty_item, &dirty_realms); | ||
659 | |||
671 | invalidate = 1; | 660 | invalidate = 1; |
672 | } else if (!realm->cached_context) { | 661 | } else if (!realm->cached_context) { |
662 | dout("update_snap_trace %llx %p seq %lld new\n", | ||
663 | realm->ino, realm, realm->seq); | ||
673 | invalidate = 1; | 664 | invalidate = 1; |
665 | } else { | ||
666 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
667 | realm->ino, realm, realm->seq); | ||
674 | } | 668 | } |
675 | 669 | ||
676 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 670 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
@@ -683,6 +677,14 @@ more: | |||
683 | if (invalidate) | 677 | if (invalidate) |
684 | rebuild_snap_realms(realm); | 678 | rebuild_snap_realms(realm); |
685 | 679 | ||
680 | /* | ||
681 | * queue cap snaps _after_ we've built the new snap contexts, | ||
682 | * so that i_head_snapc can be set appropriately. | ||
683 | */ | ||
684 | list_for_each_entry(realm, &dirty_realms, dirty_item) { | ||
685 | queue_realm_cap_snaps(realm); | ||
686 | } | ||
687 | |||
686 | __cleanup_empty_realms(mdsc); | 688 | __cleanup_empty_realms(mdsc); |
687 | return 0; | 689 | return 0; |
688 | 690 | ||
@@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
715 | igrab(inode); | 717 | igrab(inode); |
716 | spin_unlock(&mdsc->snap_flush_lock); | 718 | spin_unlock(&mdsc->snap_flush_lock); |
717 | spin_lock(&inode->i_lock); | 719 | spin_lock(&inode->i_lock); |
718 | __ceph_flush_snaps(ci, &session); | 720 | __ceph_flush_snaps(ci, &session, 0); |
719 | spin_unlock(&inode->i_lock); | 721 | spin_unlock(&inode->i_lock); |
720 | iput(inode); | 722 | iput(inode); |
721 | spin_lock(&mdsc->snap_flush_lock); | 723 | spin_lock(&mdsc->snap_flush_lock); |
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
816 | }; | 818 | }; |
817 | struct inode *inode = ceph_find_inode(sb, vino); | 819 | struct inode *inode = ceph_find_inode(sb, vino); |
818 | struct ceph_inode_info *ci; | 820 | struct ceph_inode_info *ci; |
821 | struct ceph_snap_realm *oldrealm; | ||
819 | 822 | ||
820 | if (!inode) | 823 | if (!inode) |
821 | continue; | 824 | continue; |
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
841 | dout(" will move %p to split realm %llx %p\n", | 844 | dout(" will move %p to split realm %llx %p\n", |
842 | inode, realm->ino, realm); | 845 | inode, realm->ino, realm); |
843 | /* | 846 | /* |
844 | * Remove the inode from the realm's inode | 847 | * Move the inode to the new realm |
845 | * list, but don't add it to the new realm | ||
846 | * yet. We don't want the cap_snap to be | ||
847 | * queued (again) by ceph_update_snap_trace() | ||
848 | * below. Queue it _now_, under the old context. | ||
849 | */ | 848 | */ |
850 | spin_lock(&realm->inodes_with_caps_lock); | 849 | spin_lock(&realm->inodes_with_caps_lock); |
851 | list_del_init(&ci->i_snap_realm_item); | 850 | list_del_init(&ci->i_snap_realm_item); |
851 | list_add(&ci->i_snap_realm_item, | ||
852 | &realm->inodes_with_caps); | ||
853 | oldrealm = ci->i_snap_realm; | ||
854 | ci->i_snap_realm = realm; | ||
852 | spin_unlock(&realm->inodes_with_caps_lock); | 855 | spin_unlock(&realm->inodes_with_caps_lock); |
853 | spin_unlock(&inode->i_lock); | 856 | spin_unlock(&inode->i_lock); |
854 | 857 | ||
855 | ceph_queue_cap_snap(ci); | 858 | ceph_get_snap_realm(mdsc, realm); |
859 | ceph_put_snap_realm(mdsc, oldrealm); | ||
856 | 860 | ||
857 | iput(inode); | 861 | iput(inode); |
858 | continue; | 862 | continue; |
@@ -880,43 +884,9 @@ skip_inode: | |||
880 | ceph_update_snap_trace(mdsc, p, e, | 884 | ceph_update_snap_trace(mdsc, p, e, |
881 | op == CEPH_SNAP_OP_DESTROY); | 885 | op == CEPH_SNAP_OP_DESTROY); |
882 | 886 | ||
883 | if (op == CEPH_SNAP_OP_SPLIT) { | 887 | if (op == CEPH_SNAP_OP_SPLIT) |
884 | /* | ||
885 | * ok, _now_ add the inodes into the new realm. | ||
886 | */ | ||
887 | for (i = 0; i < num_split_inos; i++) { | ||
888 | struct ceph_vino vino = { | ||
889 | .ino = le64_to_cpu(split_inos[i]), | ||
890 | .snap = CEPH_NOSNAP, | ||
891 | }; | ||
892 | struct inode *inode = ceph_find_inode(sb, vino); | ||
893 | struct ceph_inode_info *ci; | ||
894 | |||
895 | if (!inode) | ||
896 | continue; | ||
897 | ci = ceph_inode(inode); | ||
898 | spin_lock(&inode->i_lock); | ||
899 | if (list_empty(&ci->i_snap_realm_item)) { | ||
900 | struct ceph_snap_realm *oldrealm = | ||
901 | ci->i_snap_realm; | ||
902 | |||
903 | dout(" moving %p to split realm %llx %p\n", | ||
904 | inode, realm->ino, realm); | ||
905 | spin_lock(&realm->inodes_with_caps_lock); | ||
906 | list_add(&ci->i_snap_realm_item, | ||
907 | &realm->inodes_with_caps); | ||
908 | ci->i_snap_realm = realm; | ||
909 | spin_unlock(&realm->inodes_with_caps_lock); | ||
910 | ceph_get_snap_realm(mdsc, realm); | ||
911 | ceph_put_snap_realm(mdsc, oldrealm); | ||
912 | } | ||
913 | spin_unlock(&inode->i_lock); | ||
914 | iput(inode); | ||
915 | } | ||
916 | |||
917 | /* we took a reference when we created the realm, above */ | 888 | /* we took a reference when we created the realm, above */ |
918 | ceph_put_snap_realm(mdsc, realm); | 889 | ceph_put_snap_realm(mdsc, realm); |
919 | } | ||
920 | 890 | ||
921 | __cleanup_empty_realms(mdsc); | 891 | __cleanup_empty_realms(mdsc); |
922 | 892 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725..b87638e84c4b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -690,6 +690,8 @@ struct ceph_snap_realm { | |||
690 | 690 | ||
691 | struct list_head empty_item; /* if i have ref==0 */ | 691 | struct list_head empty_item; /* if i have ref==0 */ |
692 | 692 | ||
693 | struct list_head dirty_item; /* if realm needs new context */ | ||
694 | |||
693 | /* the current set of snaps for this realm */ | 695 | /* the current set of snaps for this realm */ |
694 | struct ceph_snap_context *cached_context; | 696 | struct ceph_snap_context *cached_context; |
695 | 697 | ||
@@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | |||
826 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | 828 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, |
827 | struct ceph_snap_context *snapc); | 829 | struct ceph_snap_context *snapc); |
828 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, | 830 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, |
829 | struct ceph_mds_session **psession); | 831 | struct ceph_mds_session **psession, |
832 | int again); | ||
830 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 833 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
831 | struct ceph_mds_session *session); | 834 | struct ceph_mds_session *session); |
832 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); | 835 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); |
diff --git a/fs/char_dev.c b/fs/char_dev.c index f80a4f25123c..143d393881cb 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = { | |||
40 | #endif | 40 | #endif |
41 | /* permit direct mmap, for read, write or exec */ | 41 | /* permit direct mmap, for read, write or exec */ |
42 | BDI_CAP_MAP_DIRECT | | 42 | BDI_CAP_MAP_DIRECT | |
43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), | 43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | |
44 | /* no writeback happens */ | ||
45 | BDI_CAP_NO_ACCT_AND_WRITEBACK), | ||
44 | }; | 46 | }; |
45 | 47 | ||
46 | static struct kobj_map *cdev_map; | 48 | static struct kobj_map *cdev_map; |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c65c3419dd37..7e83b356cc9e 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -232,7 +232,7 @@ static int | |||
232 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 232 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, |
233 | void **request_buf) | 233 | void **request_buf) |
234 | { | 234 | { |
235 | int rc = 0; | 235 | int rc; |
236 | 236 | ||
237 | rc = cifs_reconnect_tcon(tcon, smb_command); | 237 | rc = cifs_reconnect_tcon(tcon, smb_command); |
238 | if (rc) | 238 | if (rc) |
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
250 | if (tcon != NULL) | 250 | if (tcon != NULL) |
251 | cifs_stats_inc(&tcon->num_smbs_sent); | 251 | cifs_stats_inc(&tcon->num_smbs_sent); |
252 | 252 | ||
253 | return rc; | 253 | return 0; |
254 | } | 254 | } |
255 | 255 | ||
256 | int | 256 | int |
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct, | |||
281 | 281 | ||
282 | /* If the return code is zero, this function must fill in request_buf pointer */ | 282 | /* If the return code is zero, this function must fill in request_buf pointer */ |
283 | static int | 283 | static int |
284 | smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 284 | __smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, |
285 | void **request_buf /* returned */ , | 285 | void **request_buf, void **response_buf) |
286 | void **response_buf /* returned */ ) | ||
287 | { | 286 | { |
288 | int rc = 0; | ||
289 | |||
290 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
291 | if (rc) | ||
292 | return rc; | ||
293 | |||
294 | *request_buf = cifs_buf_get(); | 287 | *request_buf = cifs_buf_get(); |
295 | if (*request_buf == NULL) { | 288 | if (*request_buf == NULL) { |
296 | /* BB should we add a retry in here if not a writepage? */ | 289 | /* BB should we add a retry in here if not a writepage? */ |
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
309 | if (tcon != NULL) | 302 | if (tcon != NULL) |
310 | cifs_stats_inc(&tcon->num_smbs_sent); | 303 | cifs_stats_inc(&tcon->num_smbs_sent); |
311 | 304 | ||
312 | return rc; | 305 | return 0; |
306 | } | ||
307 | |||
308 | /* If the return code is zero, this function must fill in request_buf pointer */ | ||
309 | static int | ||
310 | smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
311 | void **request_buf, void **response_buf) | ||
312 | { | ||
313 | int rc; | ||
314 | |||
315 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
316 | if (rc) | ||
317 | return rc; | ||
318 | |||
319 | return __smb_init(smb_command, wct, tcon, request_buf, response_buf); | ||
320 | } | ||
321 | |||
322 | static int | ||
323 | smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
324 | void **request_buf, void **response_buf) | ||
325 | { | ||
326 | if (tcon->ses->need_reconnect || tcon->need_reconnect) | ||
327 | return -EHOSTDOWN; | ||
328 | |||
329 | return __smb_init(smb_command, wct, tcon, request_buf, response_buf); | ||
313 | } | 330 | } |
314 | 331 | ||
315 | static int validate_t2(struct smb_t2_rsp *pSMB) | 332 | static int validate_t2(struct smb_t2_rsp *pSMB) |
@@ -4534,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon) | |||
4534 | 4551 | ||
4535 | cFYI(1, "In QFSUnixInfo"); | 4552 | cFYI(1, "In QFSUnixInfo"); |
4536 | QFSUnixRetry: | 4553 | QFSUnixRetry: |
4537 | rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, | 4554 | rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, |
4538 | (void **) &pSMBr); | 4555 | (void **) &pSMB, (void **) &pSMBr); |
4539 | if (rc) | 4556 | if (rc) |
4540 | return rc; | 4557 | return rc; |
4541 | 4558 | ||
@@ -4604,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap) | |||
4604 | cFYI(1, "In SETFSUnixInfo"); | 4621 | cFYI(1, "In SETFSUnixInfo"); |
4605 | SETFSUnixRetry: | 4622 | SETFSUnixRetry: |
4606 | /* BB switch to small buf init to save memory */ | 4623 | /* BB switch to small buf init to save memory */ |
4607 | rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, | 4624 | rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, |
4608 | (void **) &pSMBr); | 4625 | (void **) &pSMB, (void **) &pSMBr); |
4609 | if (rc) | 4626 | if (rc) |
4610 | return rc; | 4627 | return rc; |
4611 | 4628 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 93f77d438d3c..53cce8cc2224 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -801,6 +801,8 @@ retry_iget5_locked: | |||
801 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | 801 | inode->i_flags |= S_NOATIME | S_NOCMTIME; |
802 | if (inode->i_state & I_NEW) { | 802 | if (inode->i_state & I_NEW) { |
803 | inode->i_ino = hash; | 803 | inode->i_ino = hash; |
804 | if (S_ISREG(inode->i_mode)) | ||
805 | inode->i_data.backing_dev_info = sb->s_bdi; | ||
804 | #ifdef CONFIG_CIFS_FSCACHE | 806 | #ifdef CONFIG_CIFS_FSCACHE |
805 | /* initialize per-inode cache cookie pointer */ | 807 | /* initialize per-inode cache cookie pointer */ |
806 | CIFS_I(inode)->fscache = NULL; | 808 | CIFS_I(inode)->fscache = NULL; |
diff --git a/fs/compat.c b/fs/compat.c index 718c7062aec1..0644a154672b 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1153 | { | 1153 | { |
1154 | compat_ssize_t tot_len; | 1154 | compat_ssize_t tot_len; |
1155 | struct iovec iovstack[UIO_FASTIOV]; | 1155 | struct iovec iovstack[UIO_FASTIOV]; |
1156 | struct iovec *iov; | 1156 | struct iovec *iov = iovstack; |
1157 | ssize_t ret; | 1157 | ssize_t ret; |
1158 | io_fn_t fn; | 1158 | io_fn_t fn; |
1159 | iov_fn_t fnv; | 1159 | iov_fn_t fnv; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index eb7368ebd8cd..3eadd97324b1 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -54,6 +54,9 @@ struct page_collect { | |||
54 | unsigned nr_pages; | 54 | unsigned nr_pages; |
55 | unsigned long length; | 55 | unsigned long length; |
56 | loff_t pg_first; /* keep 64bit also in 32-arches */ | 56 | loff_t pg_first; /* keep 64bit also in 32-arches */ |
57 | bool read_4_write; /* This means two things: that the read is sync | ||
58 | * And the pages should not be unlocked. | ||
59 | */ | ||
57 | }; | 60 | }; |
58 | 61 | ||
59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 62 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
@@ -71,6 +74,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
71 | pcol->nr_pages = 0; | 74 | pcol->nr_pages = 0; |
72 | pcol->length = 0; | 75 | pcol->length = 0; |
73 | pcol->pg_first = -1; | 76 | pcol->pg_first = -1; |
77 | pcol->read_4_write = false; | ||
74 | } | 78 | } |
75 | 79 | ||
76 | static void _pcol_reset(struct page_collect *pcol) | 80 | static void _pcol_reset(struct page_collect *pcol) |
@@ -347,7 +351,8 @@ static int readpage_strip(void *data, struct page *page) | |||
347 | if (PageError(page)) | 351 | if (PageError(page)) |
348 | ClearPageError(page); | 352 | ClearPageError(page); |
349 | 353 | ||
350 | unlock_page(page); | 354 | if (!pcol->read_4_write) |
355 | unlock_page(page); | ||
351 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | 356 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," |
352 | " splitting\n", inode->i_ino, page->index); | 357 | " splitting\n", inode->i_ino, page->index); |
353 | 358 | ||
@@ -428,6 +433,7 @@ static int _readpage(struct page *page, bool is_sync) | |||
428 | /* readpage_strip might call read_exec(,is_sync==false) at several | 433 | /* readpage_strip might call read_exec(,is_sync==false) at several |
429 | * places but not if we have a single page. | 434 | * places but not if we have a single page. |
430 | */ | 435 | */ |
436 | pcol.read_4_write = is_sync; | ||
431 | ret = readpage_strip(&pcol, page); | 437 | ret = readpage_strip(&pcol, page); |
432 | if (ret) { | 438 | if (ret) { |
433 | EXOFS_ERR("_readpage => %d\n", ret); | 439 | EXOFS_ERR("_readpage => %d\n", ret); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 81e086d8aa57..ab38fef1c9a1 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -52,8 +52,6 @@ struct wb_writeback_work { | |||
52 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
53 | #include <trace/events/writeback.h> | 53 | #include <trace/events/writeback.h> |
54 | 54 | ||
55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
56 | |||
57 | /* | 55 | /* |
58 | * We don't actually have pdflush, but this one is exported though /proc... | 56 | * We don't actually have pdflush, but this one is exported though /proc... |
59 | */ | 57 | */ |
@@ -71,6 +69,16 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
71 | return test_bit(BDI_writeback_running, &bdi->state); | 69 | return test_bit(BDI_writeback_running, &bdi->state); |
72 | } | 70 | } |
73 | 71 | ||
72 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | ||
73 | { | ||
74 | struct super_block *sb = inode->i_sb; | ||
75 | |||
76 | if (strcmp(sb->s_type->name, "bdev") == 0) | ||
77 | return inode->i_mapping->backing_dev_info; | ||
78 | |||
79 | return sb->s_bdi; | ||
80 | } | ||
81 | |||
74 | static void bdi_queue_work(struct backing_dev_info *bdi, | 82 | static void bdi_queue_work(struct backing_dev_info *bdi, |
75 | struct wb_writeback_work *work) | 83 | struct wb_writeback_work *work) |
76 | { | 84 | { |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d367af1514ef..cde755cca564 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1354 | loff_t file_size; | 1354 | loff_t file_size; |
1355 | unsigned int num; | 1355 | unsigned int num; |
1356 | unsigned int offset; | 1356 | unsigned int offset; |
1357 | size_t total_len; | 1357 | size_t total_len = 0; |
1358 | 1358 | ||
1359 | req = fuse_get_req(fc); | 1359 | req = fuse_get_req(fc); |
1360 | if (IS_ERR(req)) | 1360 | if (IS_ERR(req)) |
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index cdfb8c6a4206..c16f8d8331b5 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h | |||
@@ -196,8 +196,6 @@ fh_lock(struct svc_fh *fhp) | |||
196 | static inline void | 196 | static inline void |
197 | fh_unlock(struct svc_fh *fhp) | 197 | fh_unlock(struct svc_fh *fhp) |
198 | { | 198 | { |
199 | BUG_ON(!fhp->fh_dentry); | ||
200 | |||
201 | if (fhp->fh_locked) { | 199 | if (fhp->fh_locked) { |
202 | fill_post_wcc(fhp); | 200 | fill_post_wcc(fhp); |
203 | mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); | 201 | mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); |
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..b388443c3a09 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -3,4 +3,4 @@ config FSNOTIFY | |||
3 | 3 | ||
4 | source "fs/notify/dnotify/Kconfig" | 4 | source "fs/notify/dnotify/Kconfig" |
5 | source "fs/notify/inotify/Kconfig" | 5 | source "fs/notify/inotify/Kconfig" |
6 | source "fs/notify/fanotify/Kconfig" | 6 | #source "fs/notify/fanotify/Kconfig" |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a76e0aa5cd3f..391915093fe1 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | |||
209 | } | 209 | } |
210 | 210 | ||
211 | inode->i_mode = new_mode; | 211 | inode->i_mode = new_mode; |
212 | inode->i_ctime = CURRENT_TIME; | ||
212 | di->i_mode = cpu_to_le16(inode->i_mode); | 213 | di->i_mode = cpu_to_le16(inode->i_mode); |
214 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
215 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
213 | 216 | ||
214 | ocfs2_journal_dirty(handle, di_bh); | 217 | ocfs2_journal_dirty(handle, di_bh); |
215 | 218 | ||
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1361997cf205..cbe2f057cc28 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, | |||
977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | 977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, |
978 | size_t caller_veclen, u8 target_node, int *status) | 978 | size_t caller_veclen, u8 target_node, int *status) |
979 | { | 979 | { |
980 | int ret; | 980 | int ret = 0; |
981 | struct o2net_msg *msg = NULL; | 981 | struct o2net_msg *msg = NULL; |
982 | size_t veclen, caller_bytes = 0; | 982 | size_t veclen, caller_bytes = 0; |
983 | struct kvec *vec = NULL; | 983 | struct kvec *vec = NULL; |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcfffc4a..c49f6de0e7ab 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3931 | goto out_commit; | 3931 | goto out_commit; |
3932 | } | 3932 | } |
3933 | 3933 | ||
3934 | cpos = split_hash; | ||
3935 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3936 | data_ac, meta_ac, new_dx_leaves, | ||
3937 | num_dx_leaves); | ||
3938 | if (ret) { | ||
3939 | mlog_errno(ret); | ||
3940 | goto out_commit; | ||
3941 | } | ||
3942 | |||
3934 | for (i = 0; i < num_dx_leaves; i++) { | 3943 | for (i = 0; i < num_dx_leaves; i++) { |
3935 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), | 3944 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3936 | orig_dx_leaves[i], | 3945 | orig_dx_leaves[i], |
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3939 | mlog_errno(ret); | 3948 | mlog_errno(ret); |
3940 | goto out_commit; | 3949 | goto out_commit; |
3941 | } | 3950 | } |
3942 | } | ||
3943 | 3951 | ||
3944 | cpos = split_hash; | 3952 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3945 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | 3953 | new_dx_leaves[i], |
3946 | data_ac, meta_ac, new_dx_leaves, | 3954 | OCFS2_JOURNAL_ACCESS_WRITE); |
3947 | num_dx_leaves); | 3955 | if (ret) { |
3948 | if (ret) { | 3956 | mlog_errno(ret); |
3949 | mlog_errno(ret); | 3957 | goto out_commit; |
3950 | goto out_commit; | 3958 | } |
3951 | } | 3959 | } |
3952 | 3960 | ||
3953 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | 3961 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c13b47..765298908f1d 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | |||
1030 | struct dlm_lock_resource *res); | 1030 | struct dlm_lock_resource *res); |
1031 | void dlm_clean_master_list(struct dlm_ctxt *dlm, | 1031 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
1032 | u8 dead_node); | 1032 | u8 dead_node); |
1033 | void dlm_force_free_mles(struct dlm_ctxt *dlm); | ||
1033 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 1034 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
1034 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); | 1035 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); |
1035 | int __dlm_lockres_unused(struct dlm_lock_resource *res); | 1036 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37dfe48..901ca52bf86b 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
636 | spin_lock(&dlm->track_lock); | 636 | spin_lock(&dlm->track_lock); |
637 | if (oldres) | 637 | if (oldres) |
638 | track_list = &oldres->tracking; | 638 | track_list = &oldres->tracking; |
639 | else | 639 | else { |
640 | track_list = &dlm->tracking_list; | 640 | track_list = &dlm->tracking_list; |
641 | if (list_empty(track_list)) { | ||
642 | dl = NULL; | ||
643 | spin_unlock(&dlm->track_lock); | ||
644 | goto bail; | ||
645 | } | ||
646 | } | ||
641 | 647 | ||
642 | list_for_each_entry(res, track_list, tracking) { | 648 | list_for_each_entry(res, track_list, tracking) { |
643 | if (&res->tracking == &dlm->tracking_list) | 649 | if (&res->tracking == &dlm->tracking_list) |
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
660 | } else | 666 | } else |
661 | dl = NULL; | 667 | dl = NULL; |
662 | 668 | ||
669 | bail: | ||
663 | /* passed to seq_show */ | 670 | /* passed to seq_show */ |
664 | return dl; | 671 | return dl; |
665 | } | 672 | } |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..11a5c87fd7f7 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
693 | 693 | ||
694 | dlm_mark_domain_leaving(dlm); | 694 | dlm_mark_domain_leaving(dlm); |
695 | dlm_leave_domain(dlm); | 695 | dlm_leave_domain(dlm); |
696 | dlm_force_free_mles(dlm); | ||
696 | dlm_complete_dlm_shutdown(dlm); | 697 | dlm_complete_dlm_shutdown(dlm); |
697 | } | 698 | } |
698 | dlm_put(dlm); | 699 | dlm_put(dlm); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ffb4c68dafa4..f564b0e5f80d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, | |||
3433 | wake_up(&res->wq); | 3433 | wake_up(&res->wq); |
3434 | wake_up(&dlm->migration_wq); | 3434 | wake_up(&dlm->migration_wq); |
3435 | } | 3435 | } |
3436 | |||
3437 | void dlm_force_free_mles(struct dlm_ctxt *dlm) | ||
3438 | { | ||
3439 | int i; | ||
3440 | struct hlist_head *bucket; | ||
3441 | struct dlm_master_list_entry *mle; | ||
3442 | struct hlist_node *tmp, *list; | ||
3443 | |||
3444 | /* | ||
3445 | * We notified all other nodes that we are exiting the domain and | ||
3446 | * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still | ||
3447 | * around we force free them and wake any processes that are waiting | ||
3448 | * on the mles | ||
3449 | */ | ||
3450 | spin_lock(&dlm->spinlock); | ||
3451 | spin_lock(&dlm->master_lock); | ||
3452 | |||
3453 | BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); | ||
3454 | BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); | ||
3455 | |||
3456 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | ||
3457 | bucket = dlm_master_hash(dlm, i); | ||
3458 | hlist_for_each_safe(list, tmp, bucket) { | ||
3459 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3460 | master_hash_node); | ||
3461 | if (mle->type != DLM_MLE_BLOCK) { | ||
3462 | mlog(ML_ERROR, "bad mle: %p\n", mle); | ||
3463 | dlm_print_one_mle(mle); | ||
3464 | } | ||
3465 | atomic_set(&mle->woken, 1); | ||
3466 | wake_up(&mle->wq); | ||
3467 | |||
3468 | __dlm_unlink_mle(dlm, mle); | ||
3469 | __dlm_mle_detach_hb_events(dlm, mle); | ||
3470 | __dlm_put_mle(mle); | ||
3471 | } | ||
3472 | } | ||
3473 | spin_unlock(&dlm->master_lock); | ||
3474 | spin_unlock(&dlm->spinlock); | ||
3475 | } | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e1b3d6..1d596d8c4a4a 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -84,6 +84,7 @@ enum { | |||
84 | OI_LS_PARENT, | 84 | OI_LS_PARENT, |
85 | OI_LS_RENAME1, | 85 | OI_LS_RENAME1, |
86 | OI_LS_RENAME2, | 86 | OI_LS_RENAME2, |
87 | OI_LS_REFLINK_TARGET, | ||
87 | }; | 88 | }; |
88 | 89 | ||
89 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 90 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a8258d..fa31d05e41b7 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -235,18 +235,31 @@ | |||
235 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) | 235 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) |
236 | 236 | ||
237 | /* Inode attributes, keep in sync with EXT2 */ | 237 | /* Inode attributes, keep in sync with EXT2 */ |
238 | #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ | 238 | #define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ |
239 | #define OCFS2_UNRM_FL (0x00000002) /* Undelete */ | 239 | #define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ |
240 | #define OCFS2_COMPR_FL (0x00000004) /* Compress file */ | 240 | #define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ |
241 | #define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ | 241 | #define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ |
242 | #define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ | 242 | #define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ |
243 | #define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ | 243 | #define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ |
244 | #define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ | 244 | #define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ |
245 | #define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ | 245 | #define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ |
246 | #define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ | 246 | /* Reserved for compression usage... */ |
247 | 247 | #define OCFS2_DIRTY_FL FS_DIRTY_FL | |
248 | #define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ | 248 | #define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ |
249 | #define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ | 249 | #define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ |
250 | #define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ | ||
251 | /* End compression flags --- maybe not all used */ | ||
252 | #define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ | ||
253 | #define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ | ||
254 | #define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ | ||
255 | #define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ | ||
256 | #define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ | ||
257 | #define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ | ||
258 | #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ | ||
259 | #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ | ||
260 | |||
261 | #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ | ||
262 | #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ | ||
250 | 263 | ||
251 | /* | 264 | /* |
252 | * Extent record flags (e_node.leaf.flags) | 265 | * Extent record flags (e_node.leaf.flags) |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..5d241505690b 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -23,10 +23,10 @@ | |||
23 | /* | 23 | /* |
24 | * ioctl commands | 24 | * ioctl commands |
25 | */ | 25 | */ |
26 | #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) | 26 | #define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS |
27 | #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) | 27 | #define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS |
28 | #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) | 28 | #define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS |
29 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | 29 | #define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Space reservation / allocation / free ioctls and argument structure | 32 | * Space reservation / allocation / free ioctls and argument structure |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 0afeda83120f..efdd75607406 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, | |||
4201 | goto out; | 4201 | goto out; |
4202 | } | 4202 | } |
4203 | 4203 | ||
4204 | mutex_lock(&new_inode->i_mutex); | 4204 | mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); |
4205 | ret = ocfs2_inode_lock(new_inode, &new_bh, 1); | 4205 | ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, |
4206 | OI_LS_REFLINK_TARGET); | ||
4206 | if (ret) { | 4207 | if (ret) { |
4207 | mlog_errno(ret); | 4208 | mlog_errno(ret); |
4208 | goto out_unlock; | 4209 | goto out_unlock; |
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e4259b80..3e78db361bc7 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c | |||
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, | |||
732 | struct ocfs2_alloc_reservation *resv, | 732 | struct ocfs2_alloc_reservation *resv, |
733 | int *cstart, int *clen) | 733 | int *cstart, int *clen) |
734 | { | 734 | { |
735 | unsigned int wanted = *clen; | ||
736 | |||
737 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) | 735 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) |
738 | return -ENOSPC; | 736 | return -ENOSPC; |
739 | 737 | ||
740 | spin_lock(&resv_lock); | 738 | spin_lock(&resv_lock); |
741 | 739 | ||
742 | /* | ||
743 | * We don't want to over-allocate for temporary | ||
744 | * windows. Otherwise, we run the risk of fragmenting the | ||
745 | * allocation space. | ||
746 | */ | ||
747 | wanted = ocfs2_resv_window_bits(resmap, resv); | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | if (ocfs2_resv_empty(resv)) { | 740 | if (ocfs2_resv_empty(resv)) { |
752 | mlog(0, "empty reservation, find new window\n"); | 741 | /* |
742 | * We don't want to over-allocate for temporary | ||
743 | * windows. Otherwise, we run the risk of fragmenting the | ||
744 | * allocation space. | ||
745 | */ | ||
746 | unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); | ||
753 | 747 | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | mlog(0, "empty reservation, find new window\n"); | ||
754 | /* | 752 | /* |
755 | * Try to get a window here. If it works, we must fall | 753 | * Try to get a window here. If it works, we must fall |
756 | * through and test the bitmap . This avoids some | 754 | * through and test the bitmap . This avoids some |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8a286f54dca1..849c2f0e0a0e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -357,7 +357,7 @@ out: | |||
357 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | 357 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, |
358 | struct ocfs2_group_desc *bg, | 358 | struct ocfs2_group_desc *bg, |
359 | struct ocfs2_chain_list *cl, | 359 | struct ocfs2_chain_list *cl, |
360 | u64 p_blkno, u32 clusters) | 360 | u64 p_blkno, unsigned int clusters) |
361 | { | 361 | { |
362 | struct ocfs2_extent_list *el = &bg->bg_list; | 362 | struct ocfs2_extent_list *el = &bg->bg_list; |
363 | struct ocfs2_extent_rec *rec; | 363 | struct ocfs2_extent_rec *rec; |
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | |||
369 | rec->e_blkno = cpu_to_le64(p_blkno); | 369 | rec->e_blkno = cpu_to_le64(p_blkno); |
370 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / | 370 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / |
371 | le16_to_cpu(cl->cl_bpc)); | 371 | le16_to_cpu(cl->cl_bpc)); |
372 | rec->e_leaf_clusters = cpu_to_le32(clusters); | 372 | rec->e_leaf_clusters = cpu_to_le16(clusters); |
373 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); | 373 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); |
374 | le16_add_cpu(&bg->bg_free_bits_count, | 374 | le16_add_cpu(&bg->bg_free_bits_count, |
375 | clusters * le16_to_cpu(cl->cl_bpc)); | 375 | clusters * le16_to_cpu(cl->cl_bpc)); |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 32499d213fc4..9975457c981f 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, | |||
128 | } | 128 | } |
129 | 129 | ||
130 | /* Fast symlinks can't be large */ | 130 | /* Fast symlinks can't be large */ |
131 | len = strlen(target); | 131 | len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); |
132 | link = kzalloc(len + 1, GFP_NOFS); | 132 | link = kzalloc(len + 1, GFP_NOFS); |
133 | if (!link) { | 133 | if (!link) { |
134 | status = -ENOMEM; | 134 | status = -ENOMEM; |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f61801..06fa5e77c40e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, | |||
1286 | xis.inode_bh = xbs.inode_bh = di_bh; | 1286 | xis.inode_bh = xbs.inode_bh = di_bh; |
1287 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1287 | di = (struct ocfs2_dinode *)di_bh->b_data; |
1288 | 1288 | ||
1289 | down_read(&oi->ip_xattr_sem); | ||
1290 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | 1289 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, |
1291 | buffer_size, &xis); | 1290 | buffer_size, &xis); |
1292 | if (ret == -ENODATA && di->i_xattr_loc) | 1291 | if (ret == -ENODATA && di->i_xattr_loc) |
1293 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | 1292 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, |
1294 | buffer_size, &xbs); | 1293 | buffer_size, &xbs); |
1295 | up_read(&oi->ip_xattr_sem); | ||
1296 | 1294 | ||
1297 | return ret; | 1295 | return ret; |
1298 | } | 1296 | } |
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, | |||
1316 | mlog_errno(ret); | 1314 | mlog_errno(ret); |
1317 | return ret; | 1315 | return ret; |
1318 | } | 1316 | } |
1317 | down_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1319 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, | 1318 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, |
1320 | name, buffer, buffer_size); | 1319 | name, buffer, buffer_size); |
1320 | up_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1321 | 1321 | ||
1322 | ocfs2_inode_unlock(inode, 0); | 1322 | ocfs2_inode_unlock(inode, 0); |
1323 | 1323 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index a1c43e7c8a7b..8e4addaa5424 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2675 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2675 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2676 | ONE("status", S_IRUGO, proc_pid_status), | 2676 | ONE("status", S_IRUGO, proc_pid_status), |
2677 | ONE("personality", S_IRUSR, proc_pid_personality), | 2677 | ONE("personality", S_IRUSR, proc_pid_personality), |
2678 | INF("limits", S_IRUSR, proc_pid_limits), | 2678 | INF("limits", S_IRUGO, proc_pid_limits), |
2679 | #ifdef CONFIG_SCHED_DEBUG | 2679 | #ifdef CONFIG_SCHED_DEBUG |
2680 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2680 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
2681 | #endif | 2681 | #endif |
@@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3011 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3011 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3012 | ONE("status", S_IRUGO, proc_pid_status), | 3012 | ONE("status", S_IRUGO, proc_pid_status), |
3013 | ONE("personality", S_IRUSR, proc_pid_personality), | 3013 | ONE("personality", S_IRUSR, proc_pid_personality), |
3014 | INF("limits", S_IRUSR, proc_pid_limits), | 3014 | INF("limits", S_IRUGO, proc_pid_limits), |
3015 | #ifdef CONFIG_SCHED_DEBUG | 3015 | #ifdef CONFIG_SCHED_DEBUG |
3016 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 3016 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
3017 | #endif | 3017 | #endif |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 271afc48b9a5..1dbca4e8cc16 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
363 | mss->referenced += PAGE_SIZE; | 363 | mss->referenced += PAGE_SIZE; |
364 | mapcount = page_mapcount(page); | 364 | mapcount = page_mapcount(page); |
365 | if (mapcount >= 2) { | 365 | if (mapcount >= 2) { |
366 | if (pte_dirty(ptent)) | 366 | if (pte_dirty(ptent) || PageDirty(page)) |
367 | mss->shared_dirty += PAGE_SIZE; | 367 | mss->shared_dirty += PAGE_SIZE; |
368 | else | 368 | else |
369 | mss->shared_clean += PAGE_SIZE; | 369 | mss->shared_clean += PAGE_SIZE; |
370 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; | 370 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; |
371 | } else { | 371 | } else { |
372 | if (pte_dirty(ptent)) | 372 | if (pte_dirty(ptent) || PageDirty(page)) |
373 | mss->private_dirty += PAGE_SIZE; | 373 | mss->private_dirty += PAGE_SIZE; |
374 | else | 374 | else |
375 | mss->private_clean += PAGE_SIZE; | 375 | mss->private_clean += PAGE_SIZE; |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 91c817ff02c3..2367fb3f70bc 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
163 | 163 | ||
164 | static const struct file_operations proc_vmcore_operations = { | 164 | static const struct file_operations proc_vmcore_operations = { |
165 | .read = read_vmcore, | 165 | .read = read_vmcore, |
166 | .llseek = generic_file_llseek, | 166 | .llseek = default_llseek, |
167 | }; | 167 | }; |
168 | 168 | ||
169 | static struct vmcore* __init get_new_element(void) | 169 | static struct vmcore* __init get_new_element(void) |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index f53505de0712..5cbb81e134ac 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page, | |||
170 | int reiserfs_unpack(struct inode *inode, struct file *filp) | 170 | int reiserfs_unpack(struct inode *inode, struct file *filp) |
171 | { | 171 | { |
172 | int retval = 0; | 172 | int retval = 0; |
173 | int depth; | ||
173 | int index; | 174 | int index; |
174 | struct page *page; | 175 | struct page *page; |
175 | struct address_space *mapping; | 176 | struct address_space *mapping; |
@@ -188,8 +189,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
188 | /* we need to make sure nobody is changing the file size beneath | 189 | /* we need to make sure nobody is changing the file size beneath |
189 | ** us | 190 | ** us |
190 | */ | 191 | */ |
191 | mutex_lock(&inode->i_mutex); | 192 | reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); |
192 | reiserfs_write_lock(inode->i_sb); | 193 | depth = reiserfs_write_lock_once(inode->i_sb); |
193 | 194 | ||
194 | write_from = inode->i_size & (blocksize - 1); | 195 | write_from = inode->i_size & (blocksize - 1); |
195 | /* if we are on a block boundary, we are already unpacked. */ | 196 | /* if we are on a block boundary, we are already unpacked. */ |
@@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
224 | 225 | ||
225 | out: | 226 | out: |
226 | mutex_unlock(&inode->i_mutex); | 227 | mutex_unlock(&inode->i_mutex); |
227 | reiserfs_write_unlock(inode->i_sb); | 228 | reiserfs_write_unlock_once(inode->i_sb, depth); |
228 | return retval; | 229 | return retval; |
229 | } | 230 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d59c4a65d492..81976ffed7d6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -668,14 +668,11 @@ xfs_inode_set_reclaim_tag( | |||
668 | xfs_perag_put(pag); | 668 | xfs_perag_put(pag); |
669 | } | 669 | } |
670 | 670 | ||
671 | void | 671 | STATIC void |
672 | __xfs_inode_clear_reclaim_tag( | 672 | __xfs_inode_clear_reclaim( |
673 | xfs_mount_t *mp, | ||
674 | xfs_perag_t *pag, | 673 | xfs_perag_t *pag, |
675 | xfs_inode_t *ip) | 674 | xfs_inode_t *ip) |
676 | { | 675 | { |
677 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
678 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
679 | pag->pag_ici_reclaimable--; | 676 | pag->pag_ici_reclaimable--; |
680 | if (!pag->pag_ici_reclaimable) { | 677 | if (!pag->pag_ici_reclaimable) { |
681 | /* clear the reclaim tag from the perag radix tree */ | 678 | /* clear the reclaim tag from the perag radix tree */ |
@@ -689,6 +686,17 @@ __xfs_inode_clear_reclaim_tag( | |||
689 | } | 686 | } |
690 | } | 687 | } |
691 | 688 | ||
689 | void | ||
690 | __xfs_inode_clear_reclaim_tag( | ||
691 | xfs_mount_t *mp, | ||
692 | xfs_perag_t *pag, | ||
693 | xfs_inode_t *ip) | ||
694 | { | ||
695 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
696 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
697 | __xfs_inode_clear_reclaim(pag, ip); | ||
698 | } | ||
699 | |||
692 | /* | 700 | /* |
693 | * Inodes in different states need to be treated differently, and the return | 701 | * Inodes in different states need to be treated differently, and the return |
694 | * value of xfs_iflush is not sufficient to get this right. The following table | 702 | * value of xfs_iflush is not sufficient to get this right. The following table |
@@ -838,6 +846,7 @@ reclaim: | |||
838 | if (!radix_tree_delete(&pag->pag_ici_root, | 846 | if (!radix_tree_delete(&pag->pag_ici_root, |
839 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 847 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
840 | ASSERT(0); | 848 | ASSERT(0); |
849 | __xfs_inode_clear_reclaim(pag, ip); | ||
841 | write_unlock(&pag->pag_ici_lock); | 850 | write_unlock(&pag->pag_ici_lock); |
842 | 851 | ||
843 | /* | 852 | /* |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ed575fb4b495..7e206fc1fa36 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -405,9 +405,15 @@ xlog_cil_push( | |||
405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
407 | 407 | ||
408 | /* lock out transaction commit, but don't block on background push */ | 408 | /* |
409 | * Lock out transaction commit, but don't block for background pushes | ||
410 | * unless we are well over the CIL space limit. See the definition of | ||
411 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
412 | * used here. | ||
413 | */ | ||
409 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 414 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
410 | if (!push_seq) | 415 | if (!push_seq && |
416 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
411 | goto out_free_ticket; | 417 | goto out_free_ticket; |
412 | down_write(&cil->xc_ctx_lock); | 418 | down_write(&cil->xc_ctx_lock); |
413 | } | 419 | } |
@@ -422,7 +428,7 @@ xlog_cil_push( | |||
422 | goto out_skip; | 428 | goto out_skip; |
423 | 429 | ||
424 | /* check for a previously pushed seqeunce */ | 430 | /* check for a previously pushed seqeunce */ |
425 | if (push_seq < cil->xc_ctx->sequence) | 431 | if (push_seq && push_seq < cil->xc_ctx->sequence) |
426 | goto out_skip; | 432 | goto out_skip; |
427 | 433 | ||
428 | /* | 434 | /* |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ced52b98b322..edcdfe01617f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -426,13 +426,13 @@ struct xfs_cil { | |||
426 | }; | 426 | }; |
427 | 427 | ||
428 | /* | 428 | /* |
429 | * The amount of log space we should the CIL to aggregate is difficult to size. | 429 | * The amount of log space we allow the CIL to aggregate is difficult to size. |
430 | * Whatever we chose we have to make we can get a reservation for the log space | 430 | * Whatever we choose, we have to make sure we can get a reservation for the |
431 | * effectively, that it is large enough to capture sufficient relogging to | 431 | * log space effectively, that it is large enough to capture sufficient |
432 | * reduce log buffer IO significantly, but it is not too large for the log or | 432 | * relogging to reduce log buffer IO significantly, but it is not too large for |
433 | * induces too much latency when writing out through the iclogs. We track both | 433 | * the log or induces too much latency when writing out through the iclogs. We |
434 | * space consumed and the number of vectors in the checkpoint context, so we | 434 | * track both space consumed and the number of vectors in the checkpoint |
435 | * need to decide which to use for limiting. | 435 | * context, so we need to decide which to use for limiting. |
436 | * | 436 | * |
437 | * Every log buffer we write out during a push needs a header reserved, which | 437 | * Every log buffer we write out during a push needs a header reserved, which |
438 | * is at least one sector and more for v2 logs. Hence we need a reservation of | 438 | * is at least one sector and more for v2 logs. Hence we need a reservation of |
@@ -459,16 +459,21 @@ struct xfs_cil { | |||
459 | * checkpoint transaction ticket is specific to the checkpoint context, rather | 459 | * checkpoint transaction ticket is specific to the checkpoint context, rather |
460 | * than the CIL itself. | 460 | * than the CIL itself. |
461 | * | 461 | * |
462 | * With dynamic reservations, we can basically make up arbitrary limits for the | 462 | * With dynamic reservations, we can effectively make up arbitrary limits for |
463 | * checkpoint size so long as they don't violate any other size rules. Hence | 463 | * the checkpoint size so long as they don't violate any other size rules. |
464 | * the initial maximum size for the checkpoint transaction will be set to a | 464 | * Recovery imposes a rule that no transaction exceed half the log, so we are |
465 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | 465 | * limited by that. Furthermore, the log transaction reservation subsystem |
466 | * right now based on the latency of writing out a large amount of data through | 466 | * tries to keep 25% of the log free, so we need to keep below that limit or we |
467 | * the circular iclog buffers. | 467 | * risk running out of free log space to start any new transactions. |
468 | * | ||
469 | * In order to keep background CIL push efficient, we will set a lower | ||
470 | * threshold at which background pushing is attempted without blocking current | ||
471 | * transaction commits. A separate, higher bound defines when CIL pushes are | ||
472 | * enforced to ensure we stay within our maximum checkpoint size bounds. | ||
473 | * threshold, yet give us plenty of space for aggregation on large logs. | ||
468 | */ | 474 | */ |
469 | 475 | #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) | |
470 | #define XLOG_CIL_SPACE_LIMIT(log) \ | 476 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) |
471 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
472 | 477 | ||
473 | /* | 478 | /* |
474 | * The reservation head lsn is not made up of a cycle number and block number. | 479 | * The reservation head lsn is not made up of a cycle number and block number. |