aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c10
-rw-r--r--fs/ceph/Kconfig1
-rw-r--r--fs/ceph/addr.c7
-rw-r--r--fs/ceph/caps.c58
-rw-r--r--fs/ceph/dir.c10
-rw-r--r--fs/ceph/export.c21
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/osd_client.c2
-rw-r--r--fs/ceph/pagelist.c12
-rw-r--r--fs/ceph/snap.c92
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/char_dev.c4
-rw-r--r--fs/cifs/cifssmb.c49
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/compat.c2
-rw-r--r--fs/exofs/inode.c8
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/nfsd/nfsfh.h2
-rw-r--r--fs/notify/Kconfig2
-rw-r--r--fs/ocfs2/acl.c3
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dir.c24
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c9
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c1
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlmglue.h1
-rw-r--r--fs/ocfs2/ocfs2_fs.h37
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h8
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/reservations.c22
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/reiserfs/ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c19
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h37
44 files changed, 355 insertions, 209 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 1320b2a05fb2..250b0a73c8a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
712 */ 712 */
713 ret = retry(iocb); 713 ret = retry(iocb);
714 714
715 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) 715 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
716 /*
717 * There's no easy way to restart the syscall since other AIO's
718 * may be already running. Just fail this IO with EINTR.
719 */
720 if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
721 ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
722 ret = -EINTR;
716 aio_complete(iocb, ret, 0); 723 aio_complete(iocb, ret, 0);
724 }
717out: 725out:
718 spin_lock_irq(&ctx->ctx_lock); 726 spin_lock_irq(&ctx->ctx_lock);
719 727
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c1d27e..0fcd2640c23f 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select LIBCRC32C 4 select LIBCRC32C
5 select CRYPTO_AES 5 select CRYPTO_AES
6 select CRYPTO
6 help 7 help
7 Choose Y or M here to include support for mounting the 8 Choose Y or M here to include support for mounting the
8 experimental Ceph distributed file system. Ceph is an extremely 9 experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4cfce1ee31fa..efbc604001c8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
411 if (i_size < page_off + len) 411 if (i_size < page_off + len)
412 len = i_size - page_off; 412 len = i_size - page_off;
413 413
414 dout("writepage %p page %p index %lu on %llu~%u\n", 414 dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
415 inode, page, page->index, page_off, len); 415 inode, page, page->index, page_off, len, snapc);
416 416
417 writeback_stat = atomic_long_inc_return(&client->writeback_count); 417 writeback_stat = atomic_long_inc_return(&client->writeback_count);
418 if (writeback_stat > 418 if (writeback_stat >
@@ -766,7 +766,8 @@ get_more_pages:
766 /* ok */ 766 /* ok */
767 if (locked_pages == 0) { 767 if (locked_pages == 0) {
768 /* prepare async write request */ 768 /* prepare async write request */
769 offset = page->index << PAGE_CACHE_SHIFT; 769 offset = (unsigned long long)page->index
770 << PAGE_CACHE_SHIFT;
770 len = wsize; 771 len = wsize;
771 req = ceph_osdc_new_request(&client->osdc, 772 req = ceph_osdc_new_request(&client->osdc,
772 &ci->i_layout, 773 &ci->i_layout,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6680ae..5e9da996a151 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
814 used |= CEPH_CAP_PIN; 814 used |= CEPH_CAP_PIN;
815 if (ci->i_rd_ref) 815 if (ci->i_rd_ref)
816 used |= CEPH_CAP_FILE_RD; 816 used |= CEPH_CAP_FILE_RD;
817 if (ci->i_rdcache_ref || ci->i_rdcache_gen) 817 if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
818 used |= CEPH_CAP_FILE_CACHE; 818 used |= CEPH_CAP_FILE_CACHE;
819 if (ci->i_wr_ref) 819 if (ci->i_wr_ref)
820 used |= CEPH_CAP_FILE_WR; 820 used |= CEPH_CAP_FILE_WR;
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1195 * asynchronously back to the MDS once sync writes complete and dirty 1195 * asynchronously back to the MDS once sync writes complete and dirty
1196 * data is written out. 1196 * data is written out.
1197 * 1197 *
1198 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session).
1200 *
1198 * Called under i_lock. Takes s_mutex as needed. 1201 * Called under i_lock. Takes s_mutex as needed.
1199 */ 1202 */
1200void __ceph_flush_snaps(struct ceph_inode_info *ci, 1203void __ceph_flush_snaps(struct ceph_inode_info *ci,
1201 struct ceph_mds_session **psession) 1204 struct ceph_mds_session **psession,
1205 int again)
1202 __releases(ci->vfs_inode->i_lock) 1206 __releases(ci->vfs_inode->i_lock)
1203 __acquires(ci->vfs_inode->i_lock) 1207 __acquires(ci->vfs_inode->i_lock)
1204{ 1208{
@@ -1227,7 +1231,7 @@ retry:
1227 * pages to be written out. 1231 * pages to be written out.
1228 */ 1232 */
1229 if (capsnap->dirty_pages || capsnap->writing) 1233 if (capsnap->dirty_pages || capsnap->writing)
1230 continue; 1234 break;
1231 1235
1232 /* 1236 /*
1233 * if cap writeback already occurred, we should have dropped 1237 * if cap writeback already occurred, we should have dropped
@@ -1240,6 +1244,13 @@ retry:
1240 dout("no auth cap (migrating?), doing nothing\n"); 1244 dout("no auth cap (migrating?), doing nothing\n");
1241 goto out; 1245 goto out;
1242 } 1246 }
1247
1248 /* only flush each capsnap once */
1249 if (!again && !list_empty(&capsnap->flushing_item)) {
1250 dout("already flushed %p, skipping\n", capsnap);
1251 continue;
1252 }
1253
1243 mds = ci->i_auth_cap->session->s_mds; 1254 mds = ci->i_auth_cap->session->s_mds;
1244 mseq = ci->i_auth_cap->mseq; 1255 mseq = ci->i_auth_cap->mseq;
1245 1256
@@ -1276,8 +1287,8 @@ retry:
1276 &session->s_cap_snaps_flushing); 1287 &session->s_cap_snaps_flushing);
1277 spin_unlock(&inode->i_lock); 1288 spin_unlock(&inode->i_lock);
1278 1289
1279 dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", 1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1280 inode, capsnap, next_follows, capsnap->size); 1291 inode, capsnap, capsnap->follows, capsnap->flush_tid);
1281 send_cap_msg(session, ceph_vino(inode).ino, 0, 1292 send_cap_msg(session, ceph_vino(inode).ino, 0,
1282 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 1293 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
1283 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, 1294 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1314 struct inode *inode = &ci->vfs_inode; 1325 struct inode *inode = &ci->vfs_inode;
1315 1326
1316 spin_lock(&inode->i_lock); 1327 spin_lock(&inode->i_lock);
1317 __ceph_flush_snaps(ci, NULL); 1328 __ceph_flush_snaps(ci, NULL, 0);
1318 spin_unlock(&inode->i_lock); 1329 spin_unlock(&inode->i_lock);
1319} 1330}
1320 1331
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1477 1488
1478 /* flush snaps first time around only */ 1489 /* flush snaps first time around only */
1479 if (!list_empty(&ci->i_cap_snaps)) 1490 if (!list_empty(&ci->i_cap_snaps))
1480 __ceph_flush_snaps(ci, &session); 1491 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1492 goto retry_locked;
1482retry: 1493retry:
1483 spin_lock(&inode->i_lock); 1494 spin_lock(&inode->i_lock);
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 if (cap && cap->session == session) { 1905 if (cap && cap->session == session) {
1895 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1906 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
1896 cap, capsnap); 1907 cap, capsnap);
1897 __ceph_flush_snaps(ci, &session); 1908 __ceph_flush_snaps(ci, &session, 1);
1898 } else { 1909 } else {
1899 pr_err("%p auth cap %p not mds%d ???\n", inode, 1910 pr_err("%p auth cap %p not mds%d ???\n", inode,
1900 cap, session->s_mds); 1911 cap, session->s_mds);
@@ -2272,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2272{ 2283{
2273 struct ceph_inode_info *ci = ceph_inode(inode); 2284 struct ceph_inode_info *ci = ceph_inode(inode);
2274 int mds = session->s_mds; 2285 int mds = session->s_mds;
2275 int seq = le32_to_cpu(grant->seq); 2286 unsigned seq = le32_to_cpu(grant->seq);
2287 unsigned issue_seq = le32_to_cpu(grant->issue_seq);
2276 int newcaps = le32_to_cpu(grant->caps); 2288 int newcaps = le32_to_cpu(grant->caps);
2277 int issued, implemented, used, wanted, dirty; 2289 int issued, implemented, used, wanted, dirty;
2278 u64 size = le64_to_cpu(grant->size); 2290 u64 size = le64_to_cpu(grant->size);
@@ -2284,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2284 int revoked_rdcache = 0; 2296 int revoked_rdcache = 0;
2285 int queue_invalidate = 0; 2297 int queue_invalidate = 0;
2286 2298
2287 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2299 dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
2288 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2300 inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
2289 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2301 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
2290 inode->i_size); 2302 inode->i_size);
2291 2303
@@ -2381,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2381 } 2393 }
2382 2394
2383 cap->seq = seq; 2395 cap->seq = seq;
2396 cap->issue_seq = issue_seq;
2384 2397
2385 /* file layout may have changed */ 2398 /* file layout may have changed */
2386 ci->i_layout = grant->layout; 2399 ci->i_layout = grant->layout;
@@ -2763,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2763 if (op == CEPH_CAP_OP_IMPORT) 2776 if (op == CEPH_CAP_OP_IMPORT)
2764 __queue_cap_release(session, vino.ino, cap_id, 2777 __queue_cap_release(session, vino.ino, cap_id,
2765 mseq, seq); 2778 mseq, seq);
2766 2779 goto flush_cap_releases;
2767 /*
2768 * send any full release message to try to move things
2769 * along for the mds (who clearly thinks we still have this
2770 * cap).
2771 */
2772 ceph_add_cap_releases(mdsc, session);
2773 ceph_send_cap_releases(mdsc, session);
2774 goto done;
2775 } 2780 }
2776 2781
2777 /* these will work even if we don't have a cap yet */ 2782 /* these will work even if we don't have a cap yet */
@@ -2799,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2799 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2804 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2800 inode, ceph_ino(inode), ceph_snap(inode), mds); 2805 inode, ceph_ino(inode), ceph_snap(inode), mds);
2801 spin_unlock(&inode->i_lock); 2806 spin_unlock(&inode->i_lock);
2802 goto done; 2807 goto flush_cap_releases;
2803 } 2808 }
2804 2809
2805 /* note that each of these drops i_lock for us */ 2810 /* note that each of these drops i_lock for us */
@@ -2823,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2823 ceph_cap_op_name(op)); 2828 ceph_cap_op_name(op));
2824 } 2829 }
2825 2830
2831 goto done;
2832
2833flush_cap_releases:
2834 /*
2835 * send any full release message to try to move things
2836 * along for the mds (who clearly thinks we still have this
2837 * cap).
2838 */
2839 ceph_add_cap_releases(mdsc, session);
2840 ceph_send_cap_releases(mdsc, session);
2841
2826done: 2842done:
2827 mutex_unlock(&session->s_mutex); 2843 mutex_unlock(&session->s_mutex);
2828done_unlocked: 2844done_unlocked:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6e4f43ff23ec..a1986eb52045 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1021,11 +1021,15 @@ out_touch:
1021static void ceph_dentry_release(struct dentry *dentry) 1021static void ceph_dentry_release(struct dentry *dentry)
1022{ 1022{
1023 struct ceph_dentry_info *di = ceph_dentry(dentry); 1023 struct ceph_dentry_info *di = ceph_dentry(dentry);
1024 struct inode *parent_inode = dentry->d_parent->d_inode; 1024 struct inode *parent_inode = NULL;
1025 u64 snapid = ceph_snap(parent_inode); 1025 u64 snapid = CEPH_NOSNAP;
1026 1026
1027 if (!IS_ROOT(dentry)) {
1028 parent_inode = dentry->d_parent->d_inode;
1029 if (parent_inode)
1030 snapid = ceph_snap(parent_inode);
1031 }
1027 dout("dentry_release %p parent %p\n", dentry, parent_inode); 1032 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1028
1029 if (parent_inode && snapid != CEPH_SNAPDIR) { 1033 if (parent_inode && snapid != CEPH_SNAPDIR) {
1030 struct ceph_inode_info *ci = ceph_inode(parent_inode); 1034 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1031 1035
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 4480cb1c63e7..e38423e82f2e 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -42,32 +42,37 @@ struct ceph_nfs_confh {
42static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, 42static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
43 int connectable) 43 int connectable)
44{ 44{
45 int type;
45 struct ceph_nfs_fh *fh = (void *)rawfh; 46 struct ceph_nfs_fh *fh = (void *)rawfh;
46 struct ceph_nfs_confh *cfh = (void *)rawfh; 47 struct ceph_nfs_confh *cfh = (void *)rawfh;
47 struct dentry *parent = dentry->d_parent; 48 struct dentry *parent = dentry->d_parent;
48 struct inode *inode = dentry->d_inode; 49 struct inode *inode = dentry->d_inode;
49 int type; 50 int connected_handle_length = sizeof(*cfh)/4;
51 int handle_length = sizeof(*fh)/4;
50 52
51 /* don't re-export snaps */ 53 /* don't re-export snaps */
52 if (ceph_snap(inode) != CEPH_NOSNAP) 54 if (ceph_snap(inode) != CEPH_NOSNAP)
53 return -EINVAL; 55 return -EINVAL;
54 56
55 if (*max_len >= sizeof(*cfh)) { 57 if (*max_len >= connected_handle_length) {
56 dout("encode_fh %p connectable\n", dentry); 58 dout("encode_fh %p connectable\n", dentry);
57 cfh->ino = ceph_ino(dentry->d_inode); 59 cfh->ino = ceph_ino(dentry->d_inode);
58 cfh->parent_ino = ceph_ino(parent->d_inode); 60 cfh->parent_ino = ceph_ino(parent->d_inode);
59 cfh->parent_name_hash = parent->d_name.hash; 61 cfh->parent_name_hash = parent->d_name.hash;
60 *max_len = sizeof(*cfh); 62 *max_len = connected_handle_length;
61 type = 2; 63 type = 2;
62 } else if (*max_len > sizeof(*fh)) { 64 } else if (*max_len >= handle_length) {
63 if (connectable) 65 if (connectable) {
64 return -ENOSPC; 66 *max_len = connected_handle_length;
67 return 255;
68 }
65 dout("encode_fh %p\n", dentry); 69 dout("encode_fh %p\n", dentry);
66 fh->ino = ceph_ino(dentry->d_inode); 70 fh->ino = ceph_ino(dentry->d_inode);
67 *max_len = sizeof(*fh); 71 *max_len = handle_length;
68 type = 1; 72 type = 1;
69 } else { 73 } else {
70 return -ENOSPC; 74 *max_len = handle_length;
75 return 255;
71 } 76 }
72 return type; 77 return type;
73} 78}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8c044a4f0457..66e4da6dba22 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -697,7 +697,7 @@ more:
697 * start_request so that a tid has been assigned. 697 * start_request so that a tid has been assigned.
698 */ 698 */
699 spin_lock(&ci->i_unsafe_lock); 699 spin_lock(&ci->i_unsafe_lock);
700 list_add(&ci->i_unsafe_writes, &req->r_unsafe_item); 700 list_add(&req->r_unsafe_item, &ci->i_unsafe_writes);
701 spin_unlock(&ci->i_unsafe_lock); 701 spin_unlock(&ci->i_unsafe_lock);
702 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); 702 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
703 } 703 }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e7cca414da03..62377ec37edf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
845 * the caller) if we fail. 845 * the caller) if we fail.
846 */ 846 */
847static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 847static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
848 bool *prehash) 848 bool *prehash, bool set_offset)
849{ 849{
850 struct dentry *realdn; 850 struct dentry *realdn;
851 851
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
877 } 877 }
878 if ((!prehash || *prehash) && d_unhashed(dn)) 878 if ((!prehash || *prehash) && d_unhashed(dn))
879 d_rehash(dn); 879 d_rehash(dn);
880 ceph_set_dentry_offset(dn); 880 if (set_offset)
881 ceph_set_dentry_offset(dn);
881out: 882out:
882 return dn; 883 return dn;
883} 884}
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1062 d_delete(dn); 1063 d_delete(dn);
1063 goto done; 1064 goto done;
1064 } 1065 }
1065 dn = splice_dentry(dn, in, &have_lease); 1066 dn = splice_dentry(dn, in, &have_lease, true);
1066 if (IS_ERR(dn)) { 1067 if (IS_ERR(dn)) {
1067 err = PTR_ERR(dn); 1068 err = PTR_ERR(dn);
1068 goto done; 1069 goto done;
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1105 goto done; 1106 goto done;
1106 } 1107 }
1107 dout(" linking snapped dir %p to dn %p\n", in, dn); 1108 dout(" linking snapped dir %p to dn %p\n", in, dn);
1108 dn = splice_dentry(dn, in, NULL); 1109 dn = splice_dentry(dn, in, NULL, true);
1109 if (IS_ERR(dn)) { 1110 if (IS_ERR(dn)) {
1110 err = PTR_ERR(dn); 1111 err = PTR_ERR(dn);
1111 goto done; 1112 goto done;
@@ -1237,7 +1238,7 @@ retry_lookup:
1237 err = PTR_ERR(in); 1238 err = PTR_ERR(in);
1238 goto out; 1239 goto out;
1239 } 1240 }
1240 dn = splice_dentry(dn, in, NULL); 1241 dn = splice_dentry(dn, in, NULL, false);
1241 if (IS_ERR(dn)) 1242 if (IS_ERR(dn))
1242 dn = NULL; 1243 dn = NULL;
1243 } 1244 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f091b1351786..fad95f8f2608 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2374 num_fcntl_locks, 2374 num_fcntl_locks,
2375 num_flock_locks); 2375 num_flock_locks);
2376 unlock_kernel(); 2376 unlock_kernel();
2377 } else {
2378 err = ceph_pagelist_append(pagelist, &rec, reclen);
2377 } 2379 }
2378 2380
2379out_free: 2381out_free:
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index dfced1dacbcd..3b5571b8ce22 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -549,7 +549,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
549 */ 549 */
550static void __cancel_request(struct ceph_osd_request *req) 550static void __cancel_request(struct ceph_osd_request *req)
551{ 551{
552 if (req->r_sent) { 552 if (req->r_sent && req->r_osd) {
553 ceph_con_revoke(&req->r_osd->o_con, req->r_request); 553 ceph_con_revoke(&req->r_osd->o_con, req->r_request);
554 req->r_sent = 0; 554 req->r_sent = 0;
555 } 555 }
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f47d364..46a368b6dce5 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
5 5
6#include "pagelist.h" 6#include "pagelist.h"
7 7
8static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
9{
10 struct page *page = list_entry(pl->head.prev, struct page,
11 lru);
12 kunmap(page);
13}
14
8int ceph_pagelist_release(struct ceph_pagelist *pl) 15int ceph_pagelist_release(struct ceph_pagelist *pl)
9{ 16{
10 if (pl->mapped_tail) 17 if (pl->mapped_tail)
11 kunmap(pl->mapped_tail); 18 ceph_pagelist_unmap_tail(pl);
19
12 while (!list_empty(&pl->head)) { 20 while (!list_empty(&pl->head)) {
13 struct page *page = list_first_entry(&pl->head, struct page, 21 struct page *page = list_first_entry(&pl->head, struct page,
14 lru); 22 lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
26 pl->room += PAGE_SIZE; 34 pl->room += PAGE_SIZE;
27 list_add_tail(&page->lru, &pl->head); 35 list_add_tail(&page->lru, &pl->head);
28 if (pl->mapped_tail) 36 if (pl->mapped_tail)
29 kunmap(pl->mapped_tail); 37 ceph_pagelist_unmap_tail(pl);
30 pl->mapped_tail = kmap(page); 38 pl->mapped_tail = kmap(page);
31 return 0; 39 return 0;
32} 40}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4868b9dcac5a..190b6c4a6f2b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
119 INIT_LIST_HEAD(&realm->children); 119 INIT_LIST_HEAD(&realm->children);
120 INIT_LIST_HEAD(&realm->child_item); 120 INIT_LIST_HEAD(&realm->child_item);
121 INIT_LIST_HEAD(&realm->empty_item); 121 INIT_LIST_HEAD(&realm->empty_item);
122 INIT_LIST_HEAD(&realm->dirty_item);
122 INIT_LIST_HEAD(&realm->inodes_with_caps); 123 INIT_LIST_HEAD(&realm->inodes_with_caps);
123 spin_lock_init(&realm->inodes_with_caps_lock); 124 spin_lock_init(&realm->inodes_with_caps_lock);
124 __insert_snap_realm(&mdsc->snap_realms, realm); 125 __insert_snap_realm(&mdsc->snap_realms, realm);
@@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
467 INIT_LIST_HEAD(&capsnap->ci_item); 468 INIT_LIST_HEAD(&capsnap->ci_item);
468 INIT_LIST_HEAD(&capsnap->flushing_item); 469 INIT_LIST_HEAD(&capsnap->flushing_item);
469 470
470 capsnap->follows = snapc->seq - 1; 471 capsnap->follows = snapc->seq;
471 capsnap->issued = __ceph_caps_issued(ci, NULL); 472 capsnap->issued = __ceph_caps_issued(ci, NULL);
472 capsnap->dirty = dirty; 473 capsnap->dirty = dirty;
473 474
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
604 struct ceph_snap_realm *realm; 605 struct ceph_snap_realm *realm;
605 int invalidate = 0; 606 int invalidate = 0;
606 int err = -ENOMEM; 607 int err = -ENOMEM;
608 LIST_HEAD(dirty_realms);
607 609
608 dout("update_snap_trace deletion=%d\n", deletion); 610 dout("update_snap_trace deletion=%d\n", deletion);
609more: 611more:
@@ -626,24 +628,6 @@ more:
626 } 628 }
627 } 629 }
628 630
629 if (le64_to_cpu(ri->seq) > realm->seq) {
630 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
631 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
632 /*
633 * if the realm seq has changed, queue a cap_snap for every
634 * inode with open caps. we do this _before_ we update
635 * the realm info so that we prepare for writeback under the
636 * _previous_ snap context.
637 *
638 * ...unless it's a snap deletion!
639 */
640 if (!deletion)
641 queue_realm_cap_snaps(realm);
642 } else {
643 dout("update_snap_trace %llx %p seq %lld unchanged\n",
644 realm->ino, realm, realm->seq);
645 }
646
647 /* ensure the parent is correct */ 631 /* ensure the parent is correct */
648 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); 632 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
649 if (err < 0) 633 if (err < 0)
@@ -651,6 +635,8 @@ more:
651 invalidate += err; 635 invalidate += err;
652 636
653 if (le64_to_cpu(ri->seq) > realm->seq) { 637 if (le64_to_cpu(ri->seq) > realm->seq) {
638 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
639 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
654 /* update realm parameters, snap lists */ 640 /* update realm parameters, snap lists */
655 realm->seq = le64_to_cpu(ri->seq); 641 realm->seq = le64_to_cpu(ri->seq);
656 realm->created = le64_to_cpu(ri->created); 642 realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
668 if (err < 0) 654 if (err < 0)
669 goto fail; 655 goto fail;
670 656
657 /* queue realm for cap_snap creation */
658 list_add(&realm->dirty_item, &dirty_realms);
659
671 invalidate = 1; 660 invalidate = 1;
672 } else if (!realm->cached_context) { 661 } else if (!realm->cached_context) {
662 dout("update_snap_trace %llx %p seq %lld new\n",
663 realm->ino, realm, realm->seq);
673 invalidate = 1; 664 invalidate = 1;
665 } else {
666 dout("update_snap_trace %llx %p seq %lld unchanged\n",
667 realm->ino, realm, realm->seq);
674 } 668 }
675 669
676 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, 670 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
683 if (invalidate) 677 if (invalidate)
684 rebuild_snap_realms(realm); 678 rebuild_snap_realms(realm);
685 679
680 /*
681 * queue cap snaps _after_ we've built the new snap contexts,
682 * so that i_head_snapc can be set appropriately.
683 */
684 list_for_each_entry(realm, &dirty_realms, dirty_item) {
685 queue_realm_cap_snaps(realm);
686 }
687
686 __cleanup_empty_realms(mdsc); 688 __cleanup_empty_realms(mdsc);
687 return 0; 689 return 0;
688 690
@@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
715 igrab(inode); 717 igrab(inode);
716 spin_unlock(&mdsc->snap_flush_lock); 718 spin_unlock(&mdsc->snap_flush_lock);
717 spin_lock(&inode->i_lock); 719 spin_lock(&inode->i_lock);
718 __ceph_flush_snaps(ci, &session); 720 __ceph_flush_snaps(ci, &session, 0);
719 spin_unlock(&inode->i_lock); 721 spin_unlock(&inode->i_lock);
720 iput(inode); 722 iput(inode);
721 spin_lock(&mdsc->snap_flush_lock); 723 spin_lock(&mdsc->snap_flush_lock);
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
816 }; 818 };
817 struct inode *inode = ceph_find_inode(sb, vino); 819 struct inode *inode = ceph_find_inode(sb, vino);
818 struct ceph_inode_info *ci; 820 struct ceph_inode_info *ci;
821 struct ceph_snap_realm *oldrealm;
819 822
820 if (!inode) 823 if (!inode)
821 continue; 824 continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
841 dout(" will move %p to split realm %llx %p\n", 844 dout(" will move %p to split realm %llx %p\n",
842 inode, realm->ino, realm); 845 inode, realm->ino, realm);
843 /* 846 /*
844 * Remove the inode from the realm's inode 847 * Move the inode to the new realm
845 * list, but don't add it to the new realm
846 * yet. We don't want the cap_snap to be
847 * queued (again) by ceph_update_snap_trace()
848 * below. Queue it _now_, under the old context.
849 */ 848 */
850 spin_lock(&realm->inodes_with_caps_lock); 849 spin_lock(&realm->inodes_with_caps_lock);
851 list_del_init(&ci->i_snap_realm_item); 850 list_del_init(&ci->i_snap_realm_item);
851 list_add(&ci->i_snap_realm_item,
852 &realm->inodes_with_caps);
853 oldrealm = ci->i_snap_realm;
854 ci->i_snap_realm = realm;
852 spin_unlock(&realm->inodes_with_caps_lock); 855 spin_unlock(&realm->inodes_with_caps_lock);
853 spin_unlock(&inode->i_lock); 856 spin_unlock(&inode->i_lock);
854 857
855 ceph_queue_cap_snap(ci); 858 ceph_get_snap_realm(mdsc, realm);
859 ceph_put_snap_realm(mdsc, oldrealm);
856 860
857 iput(inode); 861 iput(inode);
858 continue; 862 continue;
@@ -880,43 +884,9 @@ skip_inode:
880 ceph_update_snap_trace(mdsc, p, e, 884 ceph_update_snap_trace(mdsc, p, e,
881 op == CEPH_SNAP_OP_DESTROY); 885 op == CEPH_SNAP_OP_DESTROY);
882 886
883 if (op == CEPH_SNAP_OP_SPLIT) { 887 if (op == CEPH_SNAP_OP_SPLIT)
884 /*
885 * ok, _now_ add the inodes into the new realm.
886 */
887 for (i = 0; i < num_split_inos; i++) {
888 struct ceph_vino vino = {
889 .ino = le64_to_cpu(split_inos[i]),
890 .snap = CEPH_NOSNAP,
891 };
892 struct inode *inode = ceph_find_inode(sb, vino);
893 struct ceph_inode_info *ci;
894
895 if (!inode)
896 continue;
897 ci = ceph_inode(inode);
898 spin_lock(&inode->i_lock);
899 if (list_empty(&ci->i_snap_realm_item)) {
900 struct ceph_snap_realm *oldrealm =
901 ci->i_snap_realm;
902
903 dout(" moving %p to split realm %llx %p\n",
904 inode, realm->ino, realm);
905 spin_lock(&realm->inodes_with_caps_lock);
906 list_add(&ci->i_snap_realm_item,
907 &realm->inodes_with_caps);
908 ci->i_snap_realm = realm;
909 spin_unlock(&realm->inodes_with_caps_lock);
910 ceph_get_snap_realm(mdsc, realm);
911 ceph_put_snap_realm(mdsc, oldrealm);
912 }
913 spin_unlock(&inode->i_lock);
914 iput(inode);
915 }
916
917 /* we took a reference when we created the realm, above */ 888 /* we took a reference when we created the realm, above */
918 ceph_put_snap_realm(mdsc, realm); 889 ceph_put_snap_realm(mdsc, realm);
919 }
920 890
921 __cleanup_empty_realms(mdsc); 891 __cleanup_empty_realms(mdsc);
922 892
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897ae5725..b87638e84c4b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
690 690
691 struct list_head empty_item; /* if i have ref==0 */ 691 struct list_head empty_item; /* if i have ref==0 */
692 692
693 struct list_head dirty_item; /* if realm needs new context */
694
693 /* the current set of snaps for this realm */ 695 /* the current set of snaps for this realm */
694 struct ceph_snap_context *cached_context; 696 struct ceph_snap_context *cached_context;
695 697
@@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
826extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, 828extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
827 struct ceph_snap_context *snapc); 829 struct ceph_snap_context *snapc);
828extern void __ceph_flush_snaps(struct ceph_inode_info *ci, 830extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
829 struct ceph_mds_session **psession); 831 struct ceph_mds_session **psession,
832 int again);
830extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, 833extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
831 struct ceph_mds_session *session); 834 struct ceph_mds_session *session);
832extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); 835extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f80a4f25123c..143d393881cb 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = {
40#endif 40#endif
41 /* permit direct mmap, for read, write or exec */ 41 /* permit direct mmap, for read, write or exec */
42 BDI_CAP_MAP_DIRECT | 42 BDI_CAP_MAP_DIRECT |
43 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), 43 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
44 /* no writeback happens */
45 BDI_CAP_NO_ACCT_AND_WRITEBACK),
44}; 46};
45 47
46static struct kobj_map *cdev_map; 48static struct kobj_map *cdev_map;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c65c3419dd37..7e83b356cc9e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -232,7 +232,7 @@ static int
232small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 232small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
233 void **request_buf) 233 void **request_buf)
234{ 234{
235 int rc = 0; 235 int rc;
236 236
237 rc = cifs_reconnect_tcon(tcon, smb_command); 237 rc = cifs_reconnect_tcon(tcon, smb_command);
238 if (rc) 238 if (rc)
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
250 if (tcon != NULL) 250 if (tcon != NULL)
251 cifs_stats_inc(&tcon->num_smbs_sent); 251 cifs_stats_inc(&tcon->num_smbs_sent);
252 252
253 return rc; 253 return 0;
254} 254}
255 255
256int 256int
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct,
281 281
282/* If the return code is zero, this function must fill in request_buf pointer */ 282/* If the return code is zero, this function must fill in request_buf pointer */
283static int 283static int
284smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 284__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
285 void **request_buf /* returned */ , 285 void **request_buf, void **response_buf)
286 void **response_buf /* returned */ )
287{ 286{
288 int rc = 0;
289
290 rc = cifs_reconnect_tcon(tcon, smb_command);
291 if (rc)
292 return rc;
293
294 *request_buf = cifs_buf_get(); 287 *request_buf = cifs_buf_get();
295 if (*request_buf == NULL) { 288 if (*request_buf == NULL) {
296 /* BB should we add a retry in here if not a writepage? */ 289 /* BB should we add a retry in here if not a writepage? */
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
309 if (tcon != NULL) 302 if (tcon != NULL)
310 cifs_stats_inc(&tcon->num_smbs_sent); 303 cifs_stats_inc(&tcon->num_smbs_sent);
311 304
312 return rc; 305 return 0;
306}
307
308/* If the return code is zero, this function must fill in request_buf pointer */
309static int
310smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
311 void **request_buf, void **response_buf)
312{
313 int rc;
314
315 rc = cifs_reconnect_tcon(tcon, smb_command);
316 if (rc)
317 return rc;
318
319 return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
320}
321
322static int
323smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon,
324 void **request_buf, void **response_buf)
325{
326 if (tcon->ses->need_reconnect || tcon->need_reconnect)
327 return -EHOSTDOWN;
328
329 return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
313} 330}
314 331
315static int validate_t2(struct smb_t2_rsp *pSMB) 332static int validate_t2(struct smb_t2_rsp *pSMB)
@@ -4534,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon)
4534 4551
4535 cFYI(1, "In QFSUnixInfo"); 4552 cFYI(1, "In QFSUnixInfo");
4536QFSUnixRetry: 4553QFSUnixRetry:
4537 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4554 rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
4538 (void **) &pSMBr); 4555 (void **) &pSMB, (void **) &pSMBr);
4539 if (rc) 4556 if (rc)
4540 return rc; 4557 return rc;
4541 4558
@@ -4604,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
4604 cFYI(1, "In SETFSUnixInfo"); 4621 cFYI(1, "In SETFSUnixInfo");
4605SETFSUnixRetry: 4622SETFSUnixRetry:
4606 /* BB switch to small buf init to save memory */ 4623 /* BB switch to small buf init to save memory */
4607 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4624 rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
4608 (void **) &pSMBr); 4625 (void **) &pSMB, (void **) &pSMBr);
4609 if (rc) 4626 if (rc)
4610 return rc; 4627 return rc;
4611 4628
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 93f77d438d3c..53cce8cc2224 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -801,6 +801,8 @@ retry_iget5_locked:
801 inode->i_flags |= S_NOATIME | S_NOCMTIME; 801 inode->i_flags |= S_NOATIME | S_NOCMTIME;
802 if (inode->i_state & I_NEW) { 802 if (inode->i_state & I_NEW) {
803 inode->i_ino = hash; 803 inode->i_ino = hash;
804 if (S_ISREG(inode->i_mode))
805 inode->i_data.backing_dev_info = sb->s_bdi;
804#ifdef CONFIG_CIFS_FSCACHE 806#ifdef CONFIG_CIFS_FSCACHE
805 /* initialize per-inode cache cookie pointer */ 807 /* initialize per-inode cache cookie pointer */
806 CIFS_I(inode)->fscache = NULL; 808 CIFS_I(inode)->fscache = NULL;
diff --git a/fs/compat.c b/fs/compat.c
index 718c7062aec1..0644a154672b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1153{ 1153{
1154 compat_ssize_t tot_len; 1154 compat_ssize_t tot_len;
1155 struct iovec iovstack[UIO_FASTIOV]; 1155 struct iovec iovstack[UIO_FASTIOV];
1156 struct iovec *iov; 1156 struct iovec *iov = iovstack;
1157 ssize_t ret; 1157 ssize_t ret;
1158 io_fn_t fn; 1158 io_fn_t fn;
1159 iov_fn_t fnv; 1159 iov_fn_t fnv;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index eb7368ebd8cd..3eadd97324b1 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -54,6 +54,9 @@ struct page_collect {
54 unsigned nr_pages; 54 unsigned nr_pages;
55 unsigned long length; 55 unsigned long length;
56 loff_t pg_first; /* keep 64bit also in 32-arches */ 56 loff_t pg_first; /* keep 64bit also in 32-arches */
57 bool read_4_write; /* This means two things: that the read is sync
58 * And the pages should not be unlocked.
59 */
57}; 60};
58 61
59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 62static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
@@ -71,6 +74,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
71 pcol->nr_pages = 0; 74 pcol->nr_pages = 0;
72 pcol->length = 0; 75 pcol->length = 0;
73 pcol->pg_first = -1; 76 pcol->pg_first = -1;
77 pcol->read_4_write = false;
74} 78}
75 79
76static void _pcol_reset(struct page_collect *pcol) 80static void _pcol_reset(struct page_collect *pcol)
@@ -347,7 +351,8 @@ static int readpage_strip(void *data, struct page *page)
347 if (PageError(page)) 351 if (PageError(page))
348 ClearPageError(page); 352 ClearPageError(page);
349 353
350 unlock_page(page); 354 if (!pcol->read_4_write)
355 unlock_page(page);
351 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 356 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
352 " splitting\n", inode->i_ino, page->index); 357 " splitting\n", inode->i_ino, page->index);
353 358
@@ -428,6 +433,7 @@ static int _readpage(struct page *page, bool is_sync)
428 /* readpage_strip might call read_exec(,is_sync==false) at several 433 /* readpage_strip might call read_exec(,is_sync==false) at several
429 * places but not if we have a single page. 434 * places but not if we have a single page.
430 */ 435 */
436 pcol.read_4_write = is_sync;
431 ret = readpage_strip(&pcol, page); 437 ret = readpage_strip(&pcol, page);
432 if (ret) { 438 if (ret) {
433 EXOFS_ERR("_readpage => %d\n", ret); 439 EXOFS_ERR("_readpage => %d\n", ret);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 81e086d8aa57..ab38fef1c9a1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,8 +52,6 @@ struct wb_writeback_work {
52#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
53#include <trace/events/writeback.h> 53#include <trace/events/writeback.h>
54 54
55#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
56
57/* 55/*
58 * We don't actually have pdflush, but this one is exported though /proc... 56 * We don't actually have pdflush, but this one is exported though /proc...
59 */ 57 */
@@ -71,6 +69,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
71 return test_bit(BDI_writeback_running, &bdi->state); 69 return test_bit(BDI_writeback_running, &bdi->state);
72} 70}
73 71
72static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
73{
74 struct super_block *sb = inode->i_sb;
75
76 if (strcmp(sb->s_type->name, "bdev") == 0)
77 return inode->i_mapping->backing_dev_info;
78
79 return sb->s_bdi;
80}
81
74static void bdi_queue_work(struct backing_dev_info *bdi, 82static void bdi_queue_work(struct backing_dev_info *bdi,
75 struct wb_writeback_work *work) 83 struct wb_writeback_work *work)
76{ 84{
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d367af1514ef..cde755cca564 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1354 loff_t file_size; 1354 loff_t file_size;
1355 unsigned int num; 1355 unsigned int num;
1356 unsigned int offset; 1356 unsigned int offset;
1357 size_t total_len; 1357 size_t total_len = 0;
1358 1358
1359 req = fuse_get_req(fc); 1359 req = fuse_get_req(fc);
1360 if (IS_ERR(req)) 1360 if (IS_ERR(req))
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index cdfb8c6a4206..c16f8d8331b5 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -196,8 +196,6 @@ fh_lock(struct svc_fh *fhp)
196static inline void 196static inline void
197fh_unlock(struct svc_fh *fhp) 197fh_unlock(struct svc_fh *fhp)
198{ 198{
199 BUG_ON(!fhp->fh_dentry);
200
201 if (fhp->fh_locked) { 199 if (fhp->fh_locked) {
202 fill_post_wcc(fhp); 200 fill_post_wcc(fhp);
203 mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); 201 mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 22c629eedd82..b388443c3a09 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
3 3
4source "fs/notify/dnotify/Kconfig" 4source "fs/notify/dnotify/Kconfig"
5source "fs/notify/inotify/Kconfig" 5source "fs/notify/inotify/Kconfig"
6source "fs/notify/fanotify/Kconfig" 6#source "fs/notify/fanotify/Kconfig"
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a76e0aa5cd3f..391915093fe1 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
209 } 209 }
210 210
211 inode->i_mode = new_mode; 211 inode->i_mode = new_mode;
212 inode->i_ctime = CURRENT_TIME;
212 di->i_mode = cpu_to_le16(inode->i_mode); 213 di->i_mode = cpu_to_le16(inode->i_mode);
214 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
215 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
213 216
214 ocfs2_journal_dirty(handle, di_bh); 217 ocfs2_journal_dirty(handle, di_bh);
215 218
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1361997cf205..cbe2f057cc28 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
977int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, 977int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
978 size_t caller_veclen, u8 target_node, int *status) 978 size_t caller_veclen, u8 target_node, int *status)
979{ 979{
980 int ret; 980 int ret = 0;
981 struct o2net_msg *msg = NULL; 981 struct o2net_msg *msg = NULL;
982 size_t veclen, caller_bytes = 0; 982 size_t veclen, caller_bytes = 0;
983 struct kvec *vec = NULL; 983 struct kvec *vec = NULL;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f04ebcfffc4a..c49f6de0e7ab 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3931 goto out_commit; 3931 goto out_commit;
3932 } 3932 }
3933 3933
3934 cpos = split_hash;
3935 ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
3936 data_ac, meta_ac, new_dx_leaves,
3937 num_dx_leaves);
3938 if (ret) {
3939 mlog_errno(ret);
3940 goto out_commit;
3941 }
3942
3934 for (i = 0; i < num_dx_leaves; i++) { 3943 for (i = 0; i < num_dx_leaves; i++) {
3935 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), 3944 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3936 orig_dx_leaves[i], 3945 orig_dx_leaves[i],
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3939 mlog_errno(ret); 3948 mlog_errno(ret);
3940 goto out_commit; 3949 goto out_commit;
3941 } 3950 }
3942 }
3943 3951
3944 cpos = split_hash; 3952 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3945 ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, 3953 new_dx_leaves[i],
3946 data_ac, meta_ac, new_dx_leaves, 3954 OCFS2_JOURNAL_ACCESS_WRITE);
3947 num_dx_leaves); 3955 if (ret) {
3948 if (ret) { 3956 mlog_errno(ret);
3949 mlog_errno(ret); 3957 goto out_commit;
3950 goto out_commit; 3958 }
3951 } 3959 }
3952 3960
3953 ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, 3961 ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4b6ae2c13b47..765298908f1d 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
1030 struct dlm_lock_resource *res); 1030 struct dlm_lock_resource *res);
1031void dlm_clean_master_list(struct dlm_ctxt *dlm, 1031void dlm_clean_master_list(struct dlm_ctxt *dlm,
1032 u8 dead_node); 1032 u8 dead_node);
1033void dlm_force_free_mles(struct dlm_ctxt *dlm);
1033int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); 1034int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
1034int __dlm_lockres_has_locks(struct dlm_lock_resource *res); 1035int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
1035int __dlm_lockres_unused(struct dlm_lock_resource *res); 1036int __dlm_lockres_unused(struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5efdd37dfe48..901ca52bf86b 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
636 spin_lock(&dlm->track_lock); 636 spin_lock(&dlm->track_lock);
637 if (oldres) 637 if (oldres)
638 track_list = &oldres->tracking; 638 track_list = &oldres->tracking;
639 else 639 else {
640 track_list = &dlm->tracking_list; 640 track_list = &dlm->tracking_list;
641 if (list_empty(track_list)) {
642 dl = NULL;
643 spin_unlock(&dlm->track_lock);
644 goto bail;
645 }
646 }
641 647
642 list_for_each_entry(res, track_list, tracking) { 648 list_for_each_entry(res, track_list, tracking) {
643 if (&res->tracking == &dlm->tracking_list) 649 if (&res->tracking == &dlm->tracking_list)
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
660 } else 666 } else
661 dl = NULL; 667 dl = NULL;
662 668
669bail:
663 /* passed to seq_show */ 670 /* passed to seq_show */
664 return dl; 671 return dl;
665} 672}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 153abb5abef0..11a5c87fd7f7 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
693 693
694 dlm_mark_domain_leaving(dlm); 694 dlm_mark_domain_leaving(dlm);
695 dlm_leave_domain(dlm); 695 dlm_leave_domain(dlm);
696 dlm_force_free_mles(dlm);
696 dlm_complete_dlm_shutdown(dlm); 697 dlm_complete_dlm_shutdown(dlm);
697 } 698 }
698 dlm_put(dlm); 699 dlm_put(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ffb4c68dafa4..f564b0e5f80d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
3433 wake_up(&res->wq); 3433 wake_up(&res->wq);
3434 wake_up(&dlm->migration_wq); 3434 wake_up(&dlm->migration_wq);
3435} 3435}
3436
3437void dlm_force_free_mles(struct dlm_ctxt *dlm)
3438{
3439 int i;
3440 struct hlist_head *bucket;
3441 struct dlm_master_list_entry *mle;
3442 struct hlist_node *tmp, *list;
3443
3444 /*
3445 * We notified all other nodes that we are exiting the domain and
3446 * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
3447 * around we force free them and wake any processes that are waiting
3448 * on the mles
3449 */
3450 spin_lock(&dlm->spinlock);
3451 spin_lock(&dlm->master_lock);
3452
3453 BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
3454 BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
3455
3456 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
3457 bucket = dlm_master_hash(dlm, i);
3458 hlist_for_each_safe(list, tmp, bucket) {
3459 mle = hlist_entry(list, struct dlm_master_list_entry,
3460 master_hash_node);
3461 if (mle->type != DLM_MLE_BLOCK) {
3462 mlog(ML_ERROR, "bad mle: %p\n", mle);
3463 dlm_print_one_mle(mle);
3464 }
3465 atomic_set(&mle->woken, 1);
3466 wake_up(&mle->wq);
3467
3468 __dlm_unlink_mle(dlm, mle);
3469 __dlm_mle_detach_hb_events(dlm, mle);
3470 __dlm_put_mle(mle);
3471 }
3472 }
3473 spin_unlock(&dlm->master_lock);
3474 spin_unlock(&dlm->spinlock);
3475}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d1ce48e1b3d6..1d596d8c4a4a 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -84,6 +84,7 @@ enum {
84 OI_LS_PARENT, 84 OI_LS_PARENT,
85 OI_LS_RENAME1, 85 OI_LS_RENAME1,
86 OI_LS_RENAME2, 86 OI_LS_RENAME2,
87 OI_LS_REFLINK_TARGET,
87}; 88};
88 89
89int ocfs2_dlm_init(struct ocfs2_super *osb); 90int ocfs2_dlm_init(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 33f1c9a8258d..fa31d05e41b7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -235,18 +235,31 @@
235#define OCFS2_HAS_REFCOUNT_FL (0x0010) 235#define OCFS2_HAS_REFCOUNT_FL (0x0010)
236 236
237/* Inode attributes, keep in sync with EXT2 */ 237/* Inode attributes, keep in sync with EXT2 */
238#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ 238#define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */
239#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ 239#define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */
240#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ 240#define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */
241#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ 241#define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */
242#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ 242#define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */
243#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ 243#define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */
244#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ 244#define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
245#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ 245#define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
246#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ 246/* Reserved for compression usage... */
247 247#define OCFS2_DIRTY_FL FS_DIRTY_FL
248#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ 248#define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
249#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ 249#define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
250#define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
251/* End compression flags --- maybe not all used */
252#define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */
253#define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */
254#define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */
255#define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */
256#define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
257#define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
258#define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
259#define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
260
261#define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
262#define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
250 263
251/* 264/*
252 * Extent record flags (e_node.leaf.flags) 265 * Extent record flags (e_node.leaf.flags)
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 2d3420af1a83..5d241505690b 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -23,10 +23,10 @@
23/* 23/*
24 * ioctl commands 24 * ioctl commands
25 */ 25 */
26#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) 26#define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS
27#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) 27#define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS
28#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) 28#define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS
29#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) 29#define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS
30 30
31/* 31/*
32 * Space reservation / allocation / free ioctls and argument structure 32 * Space reservation / allocation / free ioctls and argument structure
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 0afeda83120f..efdd75607406 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
4201 goto out; 4201 goto out;
4202 } 4202 }
4203 4203
4204 mutex_lock(&new_inode->i_mutex); 4204 mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
4205 ret = ocfs2_inode_lock(new_inode, &new_bh, 1); 4205 ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
4206 OI_LS_REFLINK_TARGET);
4206 if (ret) { 4207 if (ret) {
4207 mlog_errno(ret); 4208 mlog_errno(ret);
4208 goto out_unlock; 4209 goto out_unlock;
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index d8b6e4259b80..3e78db361bc7 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
732 struct ocfs2_alloc_reservation *resv, 732 struct ocfs2_alloc_reservation *resv,
733 int *cstart, int *clen) 733 int *cstart, int *clen)
734{ 734{
735 unsigned int wanted = *clen;
736
737 if (resv == NULL || ocfs2_resmap_disabled(resmap)) 735 if (resv == NULL || ocfs2_resmap_disabled(resmap))
738 return -ENOSPC; 736 return -ENOSPC;
739 737
740 spin_lock(&resv_lock); 738 spin_lock(&resv_lock);
741 739
742 /*
743 * We don't want to over-allocate for temporary
744 * windows. Otherwise, we run the risk of fragmenting the
745 * allocation space.
746 */
747 wanted = ocfs2_resv_window_bits(resmap, resv);
748 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
749 wanted = *clen;
750
751 if (ocfs2_resv_empty(resv)) { 740 if (ocfs2_resv_empty(resv)) {
752 mlog(0, "empty reservation, find new window\n"); 741 /*
742 * We don't want to over-allocate for temporary
743 * windows. Otherwise, we run the risk of fragmenting the
744 * allocation space.
745 */
746 unsigned int wanted = ocfs2_resv_window_bits(resmap, resv);
753 747
748 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
749 wanted = *clen;
750
751 mlog(0, "empty reservation, find new window\n");
754 /* 752 /*
755 * Try to get a window here. If it works, we must fall 753 * Try to get a window here. If it works, we must fall
756 * through and test the bitmap . This avoids some 754 * through and test the bitmap . This avoids some
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8a286f54dca1..849c2f0e0a0e 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -357,7 +357,7 @@ out:
357static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, 357static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
358 struct ocfs2_group_desc *bg, 358 struct ocfs2_group_desc *bg,
359 struct ocfs2_chain_list *cl, 359 struct ocfs2_chain_list *cl,
360 u64 p_blkno, u32 clusters) 360 u64 p_blkno, unsigned int clusters)
361{ 361{
362 struct ocfs2_extent_list *el = &bg->bg_list; 362 struct ocfs2_extent_list *el = &bg->bg_list;
363 struct ocfs2_extent_rec *rec; 363 struct ocfs2_extent_rec *rec;
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
369 rec->e_blkno = cpu_to_le64(p_blkno); 369 rec->e_blkno = cpu_to_le64(p_blkno);
370 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / 370 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
371 le16_to_cpu(cl->cl_bpc)); 371 le16_to_cpu(cl->cl_bpc));
372 rec->e_leaf_clusters = cpu_to_le32(clusters); 372 rec->e_leaf_clusters = cpu_to_le16(clusters);
373 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); 373 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
374 le16_add_cpu(&bg->bg_free_bits_count, 374 le16_add_cpu(&bg->bg_free_bits_count,
375 clusters * le16_to_cpu(cl->cl_bpc)); 375 clusters * le16_to_cpu(cl->cl_bpc));
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 32499d213fc4..9975457c981f 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
128 } 128 }
129 129
130 /* Fast symlinks can't be large */ 130 /* Fast symlinks can't be large */
131 len = strlen(target); 131 len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb));
132 link = kzalloc(len + 1, GFP_NOFS); 132 link = kzalloc(len + 1, GFP_NOFS);
133 if (!link) { 133 if (!link) {
134 status = -ENOMEM; 134 status = -ENOMEM;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d03469f61801..06fa5e77c40e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
1286 xis.inode_bh = xbs.inode_bh = di_bh; 1286 xis.inode_bh = xbs.inode_bh = di_bh;
1287 di = (struct ocfs2_dinode *)di_bh->b_data; 1287 di = (struct ocfs2_dinode *)di_bh->b_data;
1288 1288
1289 down_read(&oi->ip_xattr_sem);
1290 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1289 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1291 buffer_size, &xis); 1290 buffer_size, &xis);
1292 if (ret == -ENODATA && di->i_xattr_loc) 1291 if (ret == -ENODATA && di->i_xattr_loc)
1293 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1292 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1294 buffer_size, &xbs); 1293 buffer_size, &xbs);
1295 up_read(&oi->ip_xattr_sem);
1296 1294
1297 return ret; 1295 return ret;
1298} 1296}
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode,
1316 mlog_errno(ret); 1314 mlog_errno(ret);
1317 return ret; 1315 return ret;
1318 } 1316 }
1317 down_read(&OCFS2_I(inode)->ip_xattr_sem);
1319 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1318 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1320 name, buffer, buffer_size); 1319 name, buffer, buffer_size);
1320 up_read(&OCFS2_I(inode)->ip_xattr_sem);
1321 1321
1322 ocfs2_inode_unlock(inode, 0); 1322 ocfs2_inode_unlock(inode, 0);
1323 1323
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a1c43e7c8a7b..8e4addaa5424 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2675 INF("auxv", S_IRUSR, proc_pid_auxv), 2675 INF("auxv", S_IRUSR, proc_pid_auxv),
2676 ONE("status", S_IRUGO, proc_pid_status), 2676 ONE("status", S_IRUGO, proc_pid_status),
2677 ONE("personality", S_IRUSR, proc_pid_personality), 2677 ONE("personality", S_IRUSR, proc_pid_personality),
2678 INF("limits", S_IRUSR, proc_pid_limits), 2678 INF("limits", S_IRUGO, proc_pid_limits),
2679#ifdef CONFIG_SCHED_DEBUG 2679#ifdef CONFIG_SCHED_DEBUG
2680 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2680 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2681#endif 2681#endif
@@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = {
3011 INF("auxv", S_IRUSR, proc_pid_auxv), 3011 INF("auxv", S_IRUSR, proc_pid_auxv),
3012 ONE("status", S_IRUGO, proc_pid_status), 3012 ONE("status", S_IRUGO, proc_pid_status),
3013 ONE("personality", S_IRUSR, proc_pid_personality), 3013 ONE("personality", S_IRUSR, proc_pid_personality),
3014 INF("limits", S_IRUSR, proc_pid_limits), 3014 INF("limits", S_IRUGO, proc_pid_limits),
3015#ifdef CONFIG_SCHED_DEBUG 3015#ifdef CONFIG_SCHED_DEBUG
3016 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3016 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3017#endif 3017#endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 271afc48b9a5..1dbca4e8cc16 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
363 mss->referenced += PAGE_SIZE; 363 mss->referenced += PAGE_SIZE;
364 mapcount = page_mapcount(page); 364 mapcount = page_mapcount(page);
365 if (mapcount >= 2) { 365 if (mapcount >= 2) {
366 if (pte_dirty(ptent)) 366 if (pte_dirty(ptent) || PageDirty(page))
367 mss->shared_dirty += PAGE_SIZE; 367 mss->shared_dirty += PAGE_SIZE;
368 else 368 else
369 mss->shared_clean += PAGE_SIZE; 369 mss->shared_clean += PAGE_SIZE;
370 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; 370 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
371 } else { 371 } else {
372 if (pte_dirty(ptent)) 372 if (pte_dirty(ptent) || PageDirty(page))
373 mss->private_dirty += PAGE_SIZE; 373 mss->private_dirty += PAGE_SIZE;
374 else 374 else
375 mss->private_clean += PAGE_SIZE; 375 mss->private_clean += PAGE_SIZE;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 91c817ff02c3..2367fb3f70bc 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
163 163
164static const struct file_operations proc_vmcore_operations = { 164static const struct file_operations proc_vmcore_operations = {
165 .read = read_vmcore, 165 .read = read_vmcore,
166 .llseek = generic_file_llseek, 166 .llseek = default_llseek,
167}; 167};
168 168
169static struct vmcore* __init get_new_element(void) 169static struct vmcore* __init get_new_element(void)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index f53505de0712..5cbb81e134ac 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
170int reiserfs_unpack(struct inode *inode, struct file *filp) 170int reiserfs_unpack(struct inode *inode, struct file *filp)
171{ 171{
172 int retval = 0; 172 int retval = 0;
173 int depth;
173 int index; 174 int index;
174 struct page *page; 175 struct page *page;
175 struct address_space *mapping; 176 struct address_space *mapping;
@@ -188,8 +189,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
188 /* we need to make sure nobody is changing the file size beneath 189 /* we need to make sure nobody is changing the file size beneath
189 ** us 190 ** us
190 */ 191 */
191 mutex_lock(&inode->i_mutex); 192 reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
192 reiserfs_write_lock(inode->i_sb); 193 depth = reiserfs_write_lock_once(inode->i_sb);
193 194
194 write_from = inode->i_size & (blocksize - 1); 195 write_from = inode->i_size & (blocksize - 1);
195 /* if we are on a block boundary, we are already unpacked. */ 196 /* if we are on a block boundary, we are already unpacked. */
@@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
224 225
225 out: 226 out:
226 mutex_unlock(&inode->i_mutex); 227 mutex_unlock(&inode->i_mutex);
227 reiserfs_write_unlock(inode->i_sb); 228 reiserfs_write_unlock_once(inode->i_sb, depth);
228 return retval; 229 return retval;
229} 230}
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index d59c4a65d492..81976ffed7d6 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -668,14 +668,11 @@ xfs_inode_set_reclaim_tag(
668 xfs_perag_put(pag); 668 xfs_perag_put(pag);
669} 669}
670 670
671void 671STATIC void
672__xfs_inode_clear_reclaim_tag( 672__xfs_inode_clear_reclaim(
673 xfs_mount_t *mp,
674 xfs_perag_t *pag, 673 xfs_perag_t *pag,
675 xfs_inode_t *ip) 674 xfs_inode_t *ip)
676{ 675{
677 radix_tree_tag_clear(&pag->pag_ici_root,
678 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
679 pag->pag_ici_reclaimable--; 676 pag->pag_ici_reclaimable--;
680 if (!pag->pag_ici_reclaimable) { 677 if (!pag->pag_ici_reclaimable) {
681 /* clear the reclaim tag from the perag radix tree */ 678 /* clear the reclaim tag from the perag radix tree */
@@ -689,6 +686,17 @@ __xfs_inode_clear_reclaim_tag(
689 } 686 }
690} 687}
691 688
689void
690__xfs_inode_clear_reclaim_tag(
691 xfs_mount_t *mp,
692 xfs_perag_t *pag,
693 xfs_inode_t *ip)
694{
695 radix_tree_tag_clear(&pag->pag_ici_root,
696 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
697 __xfs_inode_clear_reclaim(pag, ip);
698}
699
692/* 700/*
693 * Inodes in different states need to be treated differently, and the return 701 * Inodes in different states need to be treated differently, and the return
694 * value of xfs_iflush is not sufficient to get this right. The following table 702 * value of xfs_iflush is not sufficient to get this right. The following table
@@ -838,6 +846,7 @@ reclaim:
838 if (!radix_tree_delete(&pag->pag_ici_root, 846 if (!radix_tree_delete(&pag->pag_ici_root,
839 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 847 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
840 ASSERT(0); 848 ASSERT(0);
849 __xfs_inode_clear_reclaim(pag, ip);
841 write_unlock(&pag->pag_ici_lock); 850 write_unlock(&pag->pag_ici_lock);
842 851
843 /* 852 /*
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ed575fb4b495..7e206fc1fa36 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -405,9 +405,15 @@ xlog_cil_push(
405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
406 new_ctx->ticket = xlog_cil_ticket_alloc(log); 406 new_ctx->ticket = xlog_cil_ticket_alloc(log);
407 407
408 /* lock out transaction commit, but don't block on background push */ 408 /*
409 * Lock out transaction commit, but don't block for background pushes
410 * unless we are well over the CIL space limit. See the definition of
411 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
412 * used here.
413 */
409 if (!down_write_trylock(&cil->xc_ctx_lock)) { 414 if (!down_write_trylock(&cil->xc_ctx_lock)) {
410 if (!push_seq) 415 if (!push_seq &&
416 cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
411 goto out_free_ticket; 417 goto out_free_ticket;
412 down_write(&cil->xc_ctx_lock); 418 down_write(&cil->xc_ctx_lock);
413 } 419 }
@@ -422,7 +428,7 @@ xlog_cil_push(
422 goto out_skip; 428 goto out_skip;
423 429
424 /* check for a previously pushed seqeunce */ 430 /* check for a previously pushed seqeunce */
425 if (push_seq < cil->xc_ctx->sequence) 431 if (push_seq && push_seq < cil->xc_ctx->sequence)
426 goto out_skip; 432 goto out_skip;
427 433
428 /* 434 /*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ced52b98b322..edcdfe01617f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -426,13 +426,13 @@ struct xfs_cil {
426}; 426};
427 427
428/* 428/*
429 * The amount of log space we should the CIL to aggregate is difficult to size. 429 * The amount of log space we allow the CIL to aggregate is difficult to size.
430 * Whatever we chose we have to make we can get a reservation for the log space 430 * Whatever we choose, we have to make sure we can get a reservation for the
431 * effectively, that it is large enough to capture sufficient relogging to 431 * log space effectively, that it is large enough to capture sufficient
432 * reduce log buffer IO significantly, but it is not too large for the log or 432 * relogging to reduce log buffer IO significantly, but it is not too large for
433 * induces too much latency when writing out through the iclogs. We track both 433 * the log or induces too much latency when writing out through the iclogs. We
434 * space consumed and the number of vectors in the checkpoint context, so we 434 * track both space consumed and the number of vectors in the checkpoint
435 * need to decide which to use for limiting. 435 * context, so we need to decide which to use for limiting.
436 * 436 *
437 * Every log buffer we write out during a push needs a header reserved, which 437 * Every log buffer we write out during a push needs a header reserved, which
438 * is at least one sector and more for v2 logs. Hence we need a reservation of 438 * is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -459,16 +459,21 @@ struct xfs_cil {
459 * checkpoint transaction ticket is specific to the checkpoint context, rather 459 * checkpoint transaction ticket is specific to the checkpoint context, rather
460 * than the CIL itself. 460 * than the CIL itself.
461 * 461 *
462 * With dynamic reservations, we can basically make up arbitrary limits for the 462 * With dynamic reservations, we can effectively make up arbitrary limits for
463 * checkpoint size so long as they don't violate any other size rules. Hence 463 * the checkpoint size so long as they don't violate any other size rules.
464 * the initial maximum size for the checkpoint transaction will be set to a 464 * Recovery imposes a rule that no transaction exceed half the log, so we are
465 * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit 465 * limited by that. Furthermore, the log transaction reservation subsystem
466 * right now based on the latency of writing out a large amount of data through 466 * tries to keep 25% of the log free, so we need to keep below that limit or we
467 * the circular iclog buffers. 467 * risk running out of free log space to start any new transactions.
468 *
469 * In order to keep background CIL push efficient, we will set a lower
470 * threshold at which background pushing is attempted without blocking current
471 * transaction commits. A separate, higher bound defines when CIL pushes are
472 * enforced to ensure we stay within our maximum checkpoint size bounds.
473 * threshold, yet give us plenty of space for aggregation on large logs.
468 */ 474 */
469 475#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
470#define XLOG_CIL_SPACE_LIMIT(log) \ 476#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
471 (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
472 477
473/* 478/*
474 * The reservation head lsn is not made up of a cycle number and block number. 479 * The reservation head lsn is not made up of a cycle number and block number.