aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJoel Becker <joel.becker@oracle.com>2010-10-15 16:03:09 -0400
committerJoel Becker <joel.becker@oracle.com>2010-10-15 16:03:09 -0400
commitfc3718918f13ad72827d62d36ea0f5fb55090644 (patch)
tree4f9551256e02d08be37bab137f3d94182a67504c /fs
parent7bdb0d18bfd381cc5491eb95973ec5604b356c7e (diff)
parentd4396eafe402b710a8535137b3bf2abe6c059a15 (diff)
Merge branch 'globalheartbeat-2' of git://oss.oracle.com/git/smushran/linux-2.6 into ocfs2-merge-window
Conflicts: fs/ocfs2/ocfs2.h
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dir.c6
-rw-r--r--fs/9p/vfs_inode.c9
-rw-r--r--fs/9p/vfs_super.c20
-rw-r--r--fs/aio.c13
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/bio-integrity.c4
-rw-r--r--fs/ceph/Kconfig1
-rw-r--r--fs/ceph/addr.c7
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c10
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/pagelist.c12
-rw-r--r--fs/ceph/snap.c92
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/char_dev.c4
-rw-r--r--fs/cifs/Kconfig2
-rw-r--r--fs/cifs/asn1.c6
-rw-r--r--fs/cifs/cifsencrypt.c418
-rw-r--r--fs/cifs/cifsglob.h25
-rw-r--r--fs/cifs/cifspdu.h7
-rw-r--r--fs/cifs/cifsproto.h13
-rw-r--r--fs/cifs/cifssmb.c62
-rw-r--r--fs/cifs/connect.c77
-rw-r--r--fs/cifs/inode.c32
-rw-r--r--fs/cifs/netmisc.c22
-rw-r--r--fs/cifs/ntlmssp.h13
-rw-r--r--fs/cifs/sess.c132
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/psdev.c4
-rw-r--r--fs/compat.c2
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/exec.c14
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fs-writeback.c14
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/ocfs2/acl.c3
-rw-r--r--fs/ocfs2/cluster/heartbeat.c532
-rw-r--r--fs/ocfs2/cluster/heartbeat.h4
-rw-r--r--fs/ocfs2/cluster/masklog.h3
-rw-r--r--fs/ocfs2/cluster/nodemanager.c5
-rw-r--r--fs/ocfs2/cluster/ocfs2_nodemanager.h6
-rw-r--r--fs/ocfs2/cluster/tcp.c7
-rw-r--r--fs/ocfs2/dir.c24
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h30
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c13
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c401
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlmglue.h1
-rw-r--r--fs/ocfs2/ocfs2.h40
-rw-r--r--fs/ocfs2/ocfs2_fs.h78
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h8
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/reservations.c22
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c59
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/reiserfs/ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h37
75 files changed, 1612 insertions, 864 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 16c8a2a98c1b..899f168fd19c 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
292 292
293 fid = filp->private_data; 293 fid = filp->private_data;
294 P9_DPRINTK(P9_DEBUG_VFS, 294 P9_DPRINTK(P9_DEBUG_VFS,
295 "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); 295 "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
296 inode, filp, fid ? fid->fid : -1);
296 filemap_write_and_wait(inode->i_mapping); 297 filemap_write_and_wait(inode->i_mapping);
297 p9_client_clunk(fid); 298 if (fid)
299 p9_client_clunk(fid);
298 return 0; 300 return 0;
299} 301}
300 302
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c7c23eab9440..9e670d527646 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 730 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
731 goto error; 731 goto error;
732 } 732 }
733 dentry->d_op = &v9fs_cached_dentry_operations; 733 if (v9ses->cache)
734 dentry->d_op = &v9fs_cached_dentry_operations;
735 else
736 dentry->d_op = &v9fs_dentry_operations;
734 d_instantiate(dentry, inode); 737 d_instantiate(dentry, inode);
735 err = v9fs_fid_add(dentry, fid); 738 err = v9fs_fid_add(dentry, fid);
736 if (err < 0) 739 if (err < 0)
@@ -1128,6 +1131,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1128 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); 1131 v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
1129 generic_fillattr(dentry->d_inode, stat); 1132 generic_fillattr(dentry->d_inode, stat);
1130 1133
1134 p9stat_free(st);
1131 kfree(st); 1135 kfree(st);
1132 return 0; 1136 return 0;
1133} 1137}
@@ -1489,6 +1493,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1489 1493
1490 retval = strnlen(buffer, buflen); 1494 retval = strnlen(buffer, buflen);
1491done: 1495done:
1496 p9stat_free(st);
1492 kfree(st); 1497 kfree(st);
1493 return retval; 1498 return retval;
1494} 1499}
@@ -1942,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1942 .unlink = v9fs_vfs_unlink, 1947 .unlink = v9fs_vfs_unlink,
1943 .mkdir = v9fs_vfs_mkdir, 1948 .mkdir = v9fs_vfs_mkdir,
1944 .rmdir = v9fs_vfs_rmdir, 1949 .rmdir = v9fs_vfs_rmdir,
1945 .mknod = v9fs_vfs_mknod_dotl, 1950 .mknod = v9fs_vfs_mknod,
1946 .rename = v9fs_vfs_rename, 1951 .rename = v9fs_vfs_rename,
1947 .getattr = v9fs_vfs_getattr, 1952 .getattr = v9fs_vfs_getattr,
1948 .setattr = v9fs_vfs_setattr, 1953 .setattr = v9fs_vfs_setattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f9311077de68..1d12ba0ed3db 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
122 fid = v9fs_session_init(v9ses, dev_name, data); 122 fid = v9fs_session_init(v9ses, dev_name, data);
123 if (IS_ERR(fid)) { 123 if (IS_ERR(fid)) {
124 retval = PTR_ERR(fid); 124 retval = PTR_ERR(fid);
125 /*
126 * we need to call session_close to tear down some
127 * of the data structure setup by session_init
128 */
125 goto close_session; 129 goto close_session;
126 } 130 }
127 131
@@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
144 retval = -ENOMEM; 148 retval = -ENOMEM;
145 goto release_sb; 149 goto release_sb;
146 } 150 }
147
148 sb->s_root = root; 151 sb->s_root = root;
149 152
150 if (v9fs_proto_dotl(v9ses)) { 153 if (v9fs_proto_dotl(v9ses)) {
@@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
152 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 155 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
153 if (IS_ERR(st)) { 156 if (IS_ERR(st)) {
154 retval = PTR_ERR(st); 157 retval = PTR_ERR(st);
155 goto clunk_fid; 158 goto release_sb;
156 } 159 }
157 160
158 v9fs_stat2inode_dotl(st, root->d_inode); 161 v9fs_stat2inode_dotl(st, root->d_inode);
@@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
162 st = p9_client_stat(fid); 165 st = p9_client_stat(fid);
163 if (IS_ERR(st)) { 166 if (IS_ERR(st)) {
164 retval = PTR_ERR(st); 167 retval = PTR_ERR(st);
165 goto clunk_fid; 168 goto release_sb;
166 } 169 }
167 170
168 root->d_inode->i_ino = v9fs_qid2ino(&st->qid); 171 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
@@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
174 177
175 v9fs_fid_add(root, fid); 178 v9fs_fid_add(root, fid);
176 179
177P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 180 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
178 simple_set_mnt(mnt, sb); 181 simple_set_mnt(mnt, sb);
179 return 0; 182 return 0;
180 183
181clunk_fid: 184clunk_fid:
182 p9_client_clunk(fid); 185 p9_client_clunk(fid);
183
184close_session: 186close_session:
185 v9fs_session_close(v9ses); 187 v9fs_session_close(v9ses);
186 kfree(v9ses); 188 kfree(v9ses);
187 return retval; 189 return retval;
188
189release_sb: 190release_sb:
191 /*
192 * we will do the session_close and root dentry release
193 * in the below call. But we need to clunk fid, because we haven't
194 * attached the fid to dentry so it won't get clunked
195 * automatically.
196 */
197 p9_client_clunk(fid);
190 deactivate_locked_super(sb); 198 deactivate_locked_super(sb);
191 return retval; 199 return retval;
192} 200}
diff --git a/fs/aio.c b/fs/aio.c
index 3006b5bc33d6..250b0a73c8a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
712 */ 712 */
713 ret = retry(iocb); 713 ret = retry(iocb);
714 714
715 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) 715 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
716 /*
717 * There's no easy way to restart the syscall since other AIO's
718 * may be already running. Just fail this IO with EINTR.
719 */
720 if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
721 ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
722 ret = -EINTR;
716 aio_complete(iocb, ret, 0); 723 aio_complete(iocb, ret, 0);
724 }
717out: 725out:
718 spin_lock_irq(&ctx->ctx_lock); 726 spin_lock_irq(&ctx->ctx_lock);
719 727
@@ -1659,6 +1667,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1659 if (unlikely(nr < 0)) 1667 if (unlikely(nr < 0))
1660 return -EINVAL; 1668 return -EINVAL;
1661 1669
1670 if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
1671 nr = LONG_MAX/sizeof(*iocbpp);
1672
1662 if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) 1673 if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
1663 return -EFAULT; 1674 return -EFAULT;
1664 1675
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7528b913936..fd0cc0bf9a40 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void)
724{ 724{
725 int err = register_filesystem(&bm_fs_type); 725 int err = register_filesystem(&bm_fs_type);
726 if (!err) { 726 if (!err) {
727 err = register_binfmt(&misc_format); 727 err = insert_binfmt(&misc_format);
728 if (err) 728 if (err)
729 unregister_filesystem(&bm_fs_type); 729 unregister_filesystem(&bm_fs_type);
730 } 730 }
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 612a5c38d3c1..4d0ff5ee27b8 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -413,10 +413,10 @@ int bio_integrity_prep(struct bio *bio)
413 413
414 /* Allocate kernel buffer for protection data */ 414 /* Allocate kernel buffer for protection data */
415 len = sectors * blk_integrity_tuple_size(bi); 415 len = sectors * blk_integrity_tuple_size(bi);
416 buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); 416 buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
417 if (unlikely(buf == NULL)) { 417 if (unlikely(buf == NULL)) {
418 printk(KERN_ERR "could not allocate integrity buffer\n"); 418 printk(KERN_ERR "could not allocate integrity buffer\n");
419 return -EIO; 419 return -ENOMEM;
420 } 420 }
421 421
422 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 422 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c1d27e..0fcd2640c23f 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select LIBCRC32C 4 select LIBCRC32C
5 select CRYPTO_AES 5 select CRYPTO_AES
6 select CRYPTO
6 help 7 help
7 Choose Y or M here to include support for mounting the 8 Choose Y or M here to include support for mounting the
8 experimental Ceph distributed file system. Ceph is an extremely 9 experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4cfce1ee31fa..efbc604001c8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
411 if (i_size < page_off + len) 411 if (i_size < page_off + len)
412 len = i_size - page_off; 412 len = i_size - page_off;
413 413
414 dout("writepage %p page %p index %lu on %llu~%u\n", 414 dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
415 inode, page, page->index, page_off, len); 415 inode, page, page->index, page_off, len, snapc);
416 416
417 writeback_stat = atomic_long_inc_return(&client->writeback_count); 417 writeback_stat = atomic_long_inc_return(&client->writeback_count);
418 if (writeback_stat > 418 if (writeback_stat >
@@ -766,7 +766,8 @@ get_more_pages:
766 /* ok */ 766 /* ok */
767 if (locked_pages == 0) { 767 if (locked_pages == 0) {
768 /* prepare async write request */ 768 /* prepare async write request */
769 offset = page->index << PAGE_CACHE_SHIFT; 769 offset = (unsigned long long)page->index
770 << PAGE_CACHE_SHIFT;
770 len = wsize; 771 len = wsize;
771 req = ceph_osdc_new_request(&client->osdc, 772 req = ceph_osdc_new_request(&client->osdc,
772 &ci->i_layout, 773 &ci->i_layout,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6680ae..73c153092f72 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
814 used |= CEPH_CAP_PIN; 814 used |= CEPH_CAP_PIN;
815 if (ci->i_rd_ref) 815 if (ci->i_rd_ref)
816 used |= CEPH_CAP_FILE_RD; 816 used |= CEPH_CAP_FILE_RD;
817 if (ci->i_rdcache_ref || ci->i_rdcache_gen) 817 if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
818 used |= CEPH_CAP_FILE_CACHE; 818 used |= CEPH_CAP_FILE_CACHE;
819 if (ci->i_wr_ref) 819 if (ci->i_wr_ref)
820 used |= CEPH_CAP_FILE_WR; 820 used |= CEPH_CAP_FILE_WR;
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1195 * asynchronously back to the MDS once sync writes complete and dirty 1195 * asynchronously back to the MDS once sync writes complete and dirty
1196 * data is written out. 1196 * data is written out.
1197 * 1197 *
1198 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session).
1200 *
1198 * Called under i_lock. Takes s_mutex as needed. 1201 * Called under i_lock. Takes s_mutex as needed.
1199 */ 1202 */
1200void __ceph_flush_snaps(struct ceph_inode_info *ci, 1203void __ceph_flush_snaps(struct ceph_inode_info *ci,
1201 struct ceph_mds_session **psession) 1204 struct ceph_mds_session **psession,
1205 int again)
1202 __releases(ci->vfs_inode->i_lock) 1206 __releases(ci->vfs_inode->i_lock)
1203 __acquires(ci->vfs_inode->i_lock) 1207 __acquires(ci->vfs_inode->i_lock)
1204{ 1208{
@@ -1227,7 +1231,7 @@ retry:
1227 * pages to be written out. 1231 * pages to be written out.
1228 */ 1232 */
1229 if (capsnap->dirty_pages || capsnap->writing) 1233 if (capsnap->dirty_pages || capsnap->writing)
1230 continue; 1234 break;
1231 1235
1232 /* 1236 /*
1233 * if cap writeback already occurred, we should have dropped 1237 * if cap writeback already occurred, we should have dropped
@@ -1240,6 +1244,13 @@ retry:
1240 dout("no auth cap (migrating?), doing nothing\n"); 1244 dout("no auth cap (migrating?), doing nothing\n");
1241 goto out; 1245 goto out;
1242 } 1246 }
1247
1248 /* only flush each capsnap once */
1249 if (!again && !list_empty(&capsnap->flushing_item)) {
1250 dout("already flushed %p, skipping\n", capsnap);
1251 continue;
1252 }
1253
1243 mds = ci->i_auth_cap->session->s_mds; 1254 mds = ci->i_auth_cap->session->s_mds;
1244 mseq = ci->i_auth_cap->mseq; 1255 mseq = ci->i_auth_cap->mseq;
1245 1256
@@ -1276,8 +1287,8 @@ retry:
1276 &session->s_cap_snaps_flushing); 1287 &session->s_cap_snaps_flushing);
1277 spin_unlock(&inode->i_lock); 1288 spin_unlock(&inode->i_lock);
1278 1289
1279 dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", 1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1280 inode, capsnap, next_follows, capsnap->size); 1291 inode, capsnap, capsnap->follows, capsnap->flush_tid);
1281 send_cap_msg(session, ceph_vino(inode).ino, 0, 1292 send_cap_msg(session, ceph_vino(inode).ino, 0,
1282 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 1293 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
1283 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, 1294 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1314 struct inode *inode = &ci->vfs_inode; 1325 struct inode *inode = &ci->vfs_inode;
1315 1326
1316 spin_lock(&inode->i_lock); 1327 spin_lock(&inode->i_lock);
1317 __ceph_flush_snaps(ci, NULL); 1328 __ceph_flush_snaps(ci, NULL, 0);
1318 spin_unlock(&inode->i_lock); 1329 spin_unlock(&inode->i_lock);
1319} 1330}
1320 1331
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1477 1488
1478 /* flush snaps first time around only */ 1489 /* flush snaps first time around only */
1479 if (!list_empty(&ci->i_cap_snaps)) 1490 if (!list_empty(&ci->i_cap_snaps))
1480 __ceph_flush_snaps(ci, &session); 1491 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1492 goto retry_locked;
1482retry: 1493retry:
1483 spin_lock(&inode->i_lock); 1494 spin_lock(&inode->i_lock);
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 if (cap && cap->session == session) { 1905 if (cap && cap->session == session) {
1895 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1906 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
1896 cap, capsnap); 1907 cap, capsnap);
1897 __ceph_flush_snaps(ci, &session); 1908 __ceph_flush_snaps(ci, &session, 1);
1898 } else { 1909 } else {
1899 pr_err("%p auth cap %p not mds%d ???\n", inode, 1910 pr_err("%p auth cap %p not mds%d ???\n", inode,
1900 cap, session->s_mds); 1911 cap, session->s_mds);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6e4f43ff23ec..a1986eb52045 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1021,11 +1021,15 @@ out_touch:
1021static void ceph_dentry_release(struct dentry *dentry) 1021static void ceph_dentry_release(struct dentry *dentry)
1022{ 1022{
1023 struct ceph_dentry_info *di = ceph_dentry(dentry); 1023 struct ceph_dentry_info *di = ceph_dentry(dentry);
1024 struct inode *parent_inode = dentry->d_parent->d_inode; 1024 struct inode *parent_inode = NULL;
1025 u64 snapid = ceph_snap(parent_inode); 1025 u64 snapid = CEPH_NOSNAP;
1026 1026
1027 if (!IS_ROOT(dentry)) {
1028 parent_inode = dentry->d_parent->d_inode;
1029 if (parent_inode)
1030 snapid = ceph_snap(parent_inode);
1031 }
1027 dout("dentry_release %p parent %p\n", dentry, parent_inode); 1032 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1028
1029 if (parent_inode && snapid != CEPH_SNAPDIR) { 1033 if (parent_inode && snapid != CEPH_SNAPDIR) {
1030 struct ceph_inode_info *ci = ceph_inode(parent_inode); 1034 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1031 1035
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e7cca414da03..62377ec37edf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
845 * the caller) if we fail. 845 * the caller) if we fail.
846 */ 846 */
847static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 847static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
848 bool *prehash) 848 bool *prehash, bool set_offset)
849{ 849{
850 struct dentry *realdn; 850 struct dentry *realdn;
851 851
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
877 } 877 }
878 if ((!prehash || *prehash) && d_unhashed(dn)) 878 if ((!prehash || *prehash) && d_unhashed(dn))
879 d_rehash(dn); 879 d_rehash(dn);
880 ceph_set_dentry_offset(dn); 880 if (set_offset)
881 ceph_set_dentry_offset(dn);
881out: 882out:
882 return dn; 883 return dn;
883} 884}
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1062 d_delete(dn); 1063 d_delete(dn);
1063 goto done; 1064 goto done;
1064 } 1065 }
1065 dn = splice_dentry(dn, in, &have_lease); 1066 dn = splice_dentry(dn, in, &have_lease, true);
1066 if (IS_ERR(dn)) { 1067 if (IS_ERR(dn)) {
1067 err = PTR_ERR(dn); 1068 err = PTR_ERR(dn);
1068 goto done; 1069 goto done;
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1105 goto done; 1106 goto done;
1106 } 1107 }
1107 dout(" linking snapped dir %p to dn %p\n", in, dn); 1108 dout(" linking snapped dir %p to dn %p\n", in, dn);
1108 dn = splice_dentry(dn, in, NULL); 1109 dn = splice_dentry(dn, in, NULL, true);
1109 if (IS_ERR(dn)) { 1110 if (IS_ERR(dn)) {
1110 err = PTR_ERR(dn); 1111 err = PTR_ERR(dn);
1111 goto done; 1112 goto done;
@@ -1237,7 +1238,7 @@ retry_lookup:
1237 err = PTR_ERR(in); 1238 err = PTR_ERR(in);
1238 goto out; 1239 goto out;
1239 } 1240 }
1240 dn = splice_dentry(dn, in, NULL); 1241 dn = splice_dentry(dn, in, NULL, false);
1241 if (IS_ERR(dn)) 1242 if (IS_ERR(dn))
1242 dn = NULL; 1243 dn = NULL;
1243 } 1244 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f091b1351786..fad95f8f2608 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2374 num_fcntl_locks, 2374 num_fcntl_locks,
2375 num_flock_locks); 2375 num_flock_locks);
2376 unlock_kernel(); 2376 unlock_kernel();
2377 } else {
2378 err = ceph_pagelist_append(pagelist, &rec, reclen);
2377 } 2379 }
2378 2380
2379out_free: 2381out_free:
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f47d364..46a368b6dce5 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
5 5
6#include "pagelist.h" 6#include "pagelist.h"
7 7
8static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
9{
10 struct page *page = list_entry(pl->head.prev, struct page,
11 lru);
12 kunmap(page);
13}
14
8int ceph_pagelist_release(struct ceph_pagelist *pl) 15int ceph_pagelist_release(struct ceph_pagelist *pl)
9{ 16{
10 if (pl->mapped_tail) 17 if (pl->mapped_tail)
11 kunmap(pl->mapped_tail); 18 ceph_pagelist_unmap_tail(pl);
19
12 while (!list_empty(&pl->head)) { 20 while (!list_empty(&pl->head)) {
13 struct page *page = list_first_entry(&pl->head, struct page, 21 struct page *page = list_first_entry(&pl->head, struct page,
14 lru); 22 lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
26 pl->room += PAGE_SIZE; 34 pl->room += PAGE_SIZE;
27 list_add_tail(&page->lru, &pl->head); 35 list_add_tail(&page->lru, &pl->head);
28 if (pl->mapped_tail) 36 if (pl->mapped_tail)
29 kunmap(pl->mapped_tail); 37 ceph_pagelist_unmap_tail(pl);
30 pl->mapped_tail = kmap(page); 38 pl->mapped_tail = kmap(page);
31 return 0; 39 return 0;
32} 40}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4868b9dcac5a..190b6c4a6f2b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
119 INIT_LIST_HEAD(&realm->children); 119 INIT_LIST_HEAD(&realm->children);
120 INIT_LIST_HEAD(&realm->child_item); 120 INIT_LIST_HEAD(&realm->child_item);
121 INIT_LIST_HEAD(&realm->empty_item); 121 INIT_LIST_HEAD(&realm->empty_item);
122 INIT_LIST_HEAD(&realm->dirty_item);
122 INIT_LIST_HEAD(&realm->inodes_with_caps); 123 INIT_LIST_HEAD(&realm->inodes_with_caps);
123 spin_lock_init(&realm->inodes_with_caps_lock); 124 spin_lock_init(&realm->inodes_with_caps_lock);
124 __insert_snap_realm(&mdsc->snap_realms, realm); 125 __insert_snap_realm(&mdsc->snap_realms, realm);
@@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
467 INIT_LIST_HEAD(&capsnap->ci_item); 468 INIT_LIST_HEAD(&capsnap->ci_item);
468 INIT_LIST_HEAD(&capsnap->flushing_item); 469 INIT_LIST_HEAD(&capsnap->flushing_item);
469 470
470 capsnap->follows = snapc->seq - 1; 471 capsnap->follows = snapc->seq;
471 capsnap->issued = __ceph_caps_issued(ci, NULL); 472 capsnap->issued = __ceph_caps_issued(ci, NULL);
472 capsnap->dirty = dirty; 473 capsnap->dirty = dirty;
473 474
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
604 struct ceph_snap_realm *realm; 605 struct ceph_snap_realm *realm;
605 int invalidate = 0; 606 int invalidate = 0;
606 int err = -ENOMEM; 607 int err = -ENOMEM;
608 LIST_HEAD(dirty_realms);
607 609
608 dout("update_snap_trace deletion=%d\n", deletion); 610 dout("update_snap_trace deletion=%d\n", deletion);
609more: 611more:
@@ -626,24 +628,6 @@ more:
626 } 628 }
627 } 629 }
628 630
629 if (le64_to_cpu(ri->seq) > realm->seq) {
630 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
631 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
632 /*
633 * if the realm seq has changed, queue a cap_snap for every
634 * inode with open caps. we do this _before_ we update
635 * the realm info so that we prepare for writeback under the
636 * _previous_ snap context.
637 *
638 * ...unless it's a snap deletion!
639 */
640 if (!deletion)
641 queue_realm_cap_snaps(realm);
642 } else {
643 dout("update_snap_trace %llx %p seq %lld unchanged\n",
644 realm->ino, realm, realm->seq);
645 }
646
647 /* ensure the parent is correct */ 631 /* ensure the parent is correct */
648 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); 632 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
649 if (err < 0) 633 if (err < 0)
@@ -651,6 +635,8 @@ more:
651 invalidate += err; 635 invalidate += err;
652 636
653 if (le64_to_cpu(ri->seq) > realm->seq) { 637 if (le64_to_cpu(ri->seq) > realm->seq) {
638 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
639 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
654 /* update realm parameters, snap lists */ 640 /* update realm parameters, snap lists */
655 realm->seq = le64_to_cpu(ri->seq); 641 realm->seq = le64_to_cpu(ri->seq);
656 realm->created = le64_to_cpu(ri->created); 642 realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
668 if (err < 0) 654 if (err < 0)
669 goto fail; 655 goto fail;
670 656
657 /* queue realm for cap_snap creation */
658 list_add(&realm->dirty_item, &dirty_realms);
659
671 invalidate = 1; 660 invalidate = 1;
672 } else if (!realm->cached_context) { 661 } else if (!realm->cached_context) {
662 dout("update_snap_trace %llx %p seq %lld new\n",
663 realm->ino, realm, realm->seq);
673 invalidate = 1; 664 invalidate = 1;
665 } else {
666 dout("update_snap_trace %llx %p seq %lld unchanged\n",
667 realm->ino, realm, realm->seq);
674 } 668 }
675 669
676 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, 670 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
683 if (invalidate) 677 if (invalidate)
684 rebuild_snap_realms(realm); 678 rebuild_snap_realms(realm);
685 679
680 /*
681 * queue cap snaps _after_ we've built the new snap contexts,
682 * so that i_head_snapc can be set appropriately.
683 */
684 list_for_each_entry(realm, &dirty_realms, dirty_item) {
685 queue_realm_cap_snaps(realm);
686 }
687
686 __cleanup_empty_realms(mdsc); 688 __cleanup_empty_realms(mdsc);
687 return 0; 689 return 0;
688 690
@@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
715 igrab(inode); 717 igrab(inode);
716 spin_unlock(&mdsc->snap_flush_lock); 718 spin_unlock(&mdsc->snap_flush_lock);
717 spin_lock(&inode->i_lock); 719 spin_lock(&inode->i_lock);
718 __ceph_flush_snaps(ci, &session); 720 __ceph_flush_snaps(ci, &session, 0);
719 spin_unlock(&inode->i_lock); 721 spin_unlock(&inode->i_lock);
720 iput(inode); 722 iput(inode);
721 spin_lock(&mdsc->snap_flush_lock); 723 spin_lock(&mdsc->snap_flush_lock);
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
816 }; 818 };
817 struct inode *inode = ceph_find_inode(sb, vino); 819 struct inode *inode = ceph_find_inode(sb, vino);
818 struct ceph_inode_info *ci; 820 struct ceph_inode_info *ci;
821 struct ceph_snap_realm *oldrealm;
819 822
820 if (!inode) 823 if (!inode)
821 continue; 824 continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
841 dout(" will move %p to split realm %llx %p\n", 844 dout(" will move %p to split realm %llx %p\n",
842 inode, realm->ino, realm); 845 inode, realm->ino, realm);
843 /* 846 /*
844 * Remove the inode from the realm's inode 847 * Move the inode to the new realm
845 * list, but don't add it to the new realm
846 * yet. We don't want the cap_snap to be
847 * queued (again) by ceph_update_snap_trace()
848 * below. Queue it _now_, under the old context.
849 */ 848 */
850 spin_lock(&realm->inodes_with_caps_lock); 849 spin_lock(&realm->inodes_with_caps_lock);
851 list_del_init(&ci->i_snap_realm_item); 850 list_del_init(&ci->i_snap_realm_item);
851 list_add(&ci->i_snap_realm_item,
852 &realm->inodes_with_caps);
853 oldrealm = ci->i_snap_realm;
854 ci->i_snap_realm = realm;
852 spin_unlock(&realm->inodes_with_caps_lock); 855 spin_unlock(&realm->inodes_with_caps_lock);
853 spin_unlock(&inode->i_lock); 856 spin_unlock(&inode->i_lock);
854 857
855 ceph_queue_cap_snap(ci); 858 ceph_get_snap_realm(mdsc, realm);
859 ceph_put_snap_realm(mdsc, oldrealm);
856 860
857 iput(inode); 861 iput(inode);
858 continue; 862 continue;
@@ -880,43 +884,9 @@ skip_inode:
880 ceph_update_snap_trace(mdsc, p, e, 884 ceph_update_snap_trace(mdsc, p, e,
881 op == CEPH_SNAP_OP_DESTROY); 885 op == CEPH_SNAP_OP_DESTROY);
882 886
883 if (op == CEPH_SNAP_OP_SPLIT) { 887 if (op == CEPH_SNAP_OP_SPLIT)
884 /*
885 * ok, _now_ add the inodes into the new realm.
886 */
887 for (i = 0; i < num_split_inos; i++) {
888 struct ceph_vino vino = {
889 .ino = le64_to_cpu(split_inos[i]),
890 .snap = CEPH_NOSNAP,
891 };
892 struct inode *inode = ceph_find_inode(sb, vino);
893 struct ceph_inode_info *ci;
894
895 if (!inode)
896 continue;
897 ci = ceph_inode(inode);
898 spin_lock(&inode->i_lock);
899 if (list_empty(&ci->i_snap_realm_item)) {
900 struct ceph_snap_realm *oldrealm =
901 ci->i_snap_realm;
902
903 dout(" moving %p to split realm %llx %p\n",
904 inode, realm->ino, realm);
905 spin_lock(&realm->inodes_with_caps_lock);
906 list_add(&ci->i_snap_realm_item,
907 &realm->inodes_with_caps);
908 ci->i_snap_realm = realm;
909 spin_unlock(&realm->inodes_with_caps_lock);
910 ceph_get_snap_realm(mdsc, realm);
911 ceph_put_snap_realm(mdsc, oldrealm);
912 }
913 spin_unlock(&inode->i_lock);
914 iput(inode);
915 }
916
917 /* we took a reference when we created the realm, above */ 888 /* we took a reference when we created the realm, above */
918 ceph_put_snap_realm(mdsc, realm); 889 ceph_put_snap_realm(mdsc, realm);
919 }
920 890
921 __cleanup_empty_realms(mdsc); 891 __cleanup_empty_realms(mdsc);
922 892
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897ae5725..b87638e84c4b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
690 690
691 struct list_head empty_item; /* if i have ref==0 */ 691 struct list_head empty_item; /* if i have ref==0 */
692 692
693 struct list_head dirty_item; /* if realm needs new context */
694
693 /* the current set of snaps for this realm */ 695 /* the current set of snaps for this realm */
694 struct ceph_snap_context *cached_context; 696 struct ceph_snap_context *cached_context;
695 697
@@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
826extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, 828extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
827 struct ceph_snap_context *snapc); 829 struct ceph_snap_context *snapc);
828extern void __ceph_flush_snaps(struct ceph_inode_info *ci, 830extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
829 struct ceph_mds_session **psession); 831 struct ceph_mds_session **psession,
832 int again);
830extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, 833extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
831 struct ceph_mds_session *session); 834 struct ceph_mds_session *session);
832extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); 835extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f80a4f25123c..143d393881cb 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = {
40#endif 40#endif
41 /* permit direct mmap, for read, write or exec */ 41 /* permit direct mmap, for read, write or exec */
42 BDI_CAP_MAP_DIRECT | 42 BDI_CAP_MAP_DIRECT |
43 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), 43 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
44 /* no writeback happens */
45 BDI_CAP_NO_ACCT_AND_WRITEBACK),
44}; 46};
45 47
46static struct kobj_map *cdev_map; 48static struct kobj_map *cdev_map;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0da1debd499d..917b7d449bb2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,8 +2,6 @@ config CIFS
2 tristate "CIFS support (advanced network filesystem, SMBFS successor)" 2 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
3 depends on INET 3 depends on INET
4 select NLS 4 select NLS
5 select CRYPTO_MD5
6 select CRYPTO_ARC4
7 help 5 help
8 This is the client VFS module for the Common Internet File System 6 This is the client VFS module for the Common Internet File System
9 (CIFS) protocol which is the successor to the Server Message Block 7 (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 21f0fbd86989..cfd1ce34e0bc 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
597 if (compare_oid(oid, oidlen, MSKRB5_OID, 597 if (compare_oid(oid, oidlen, MSKRB5_OID,
598 MSKRB5_OID_LEN)) 598 MSKRB5_OID_LEN))
599 server->sec_mskerberos = true; 599 server->sec_mskerberos = true;
600 if (compare_oid(oid, oidlen, KRB5U2U_OID, 600 else if (compare_oid(oid, oidlen, KRB5U2U_OID,
601 KRB5U2U_OID_LEN)) 601 KRB5U2U_OID_LEN))
602 server->sec_kerberosu2u = true; 602 server->sec_kerberosu2u = true;
603 if (compare_oid(oid, oidlen, KRB5_OID, 603 else if (compare_oid(oid, oidlen, KRB5_OID,
604 KRB5_OID_LEN)) 604 KRB5_OID_LEN))
605 server->sec_kerberos = true; 605 server->sec_kerberos = true;
606 if (compare_oid(oid, oidlen, NTLMSSP_OID, 606 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
607 NTLMSSP_OID_LEN)) 607 NTLMSSP_OID_LEN))
608 server->sec_ntlmssp = true; 608 server->sec_ntlmssp = true;
609 609
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 709f2296bdb4..35042d8f7338 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -27,7 +27,6 @@
27#include "md5.h" 27#include "md5.h"
28#include "cifs_unicode.h" 28#include "cifs_unicode.h"
29#include "cifsproto.h" 29#include "cifsproto.h"
30#include "ntlmssp.h"
31#include <linux/ctype.h> 30#include <linux/ctype.h>
32#include <linux/random.h> 31#include <linux/random.h>
33 32
@@ -43,43 +42,21 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
43 unsigned char *p24); 42 unsigned char *p24);
44 43
45static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 44static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
46 struct TCP_Server_Info *server, char *signature) 45 const struct mac_key *key, char *signature)
47{ 46{
48 int rc; 47 struct MD5Context context;
49 48
50 if (cifs_pdu == NULL || server == NULL || signature == NULL) 49 if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
51 return -EINVAL; 50 return -EINVAL;
52 51
53 if (!server->ntlmssp.sdescmd5) { 52 cifs_MD5_init(&context);
54 cERROR(1, 53 cifs_MD5_update(&context, (char *)&key->data, key->len);
55 "cifs_calculate_signature: can't generate signature\n"); 54 cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
56 return -1;
57 }
58
59 rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
60 if (rc) {
61 cERROR(1, "cifs_calculate_signature: oould not init md5\n");
62 return rc;
63 }
64
65 if (server->secType == RawNTLMSSP)
66 crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
67 server->session_key.data.ntlmv2.key,
68 CIFS_NTLMV2_SESSKEY_SIZE);
69 else
70 crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
71 (char *)&server->session_key.data,
72 server->session_key.len);
73
74 crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
75 cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
76 55
77 rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); 56 cifs_MD5_final(signature, &context);
78 57 return 0;
79 return rc;
80} 58}
81 59
82
83int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, 60int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
84 __u32 *pexpected_response_sequence_number) 61 __u32 *pexpected_response_sequence_number)
85{ 62{
@@ -101,7 +78,8 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
101 server->sequence_number++; 78 server->sequence_number++;
102 spin_unlock(&GlobalMid_Lock); 79 spin_unlock(&GlobalMid_Lock);
103 80
104 rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); 81 rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key,
82 smb_signature);
105 if (rc) 83 if (rc)
106 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 84 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
107 else 85 else
@@ -111,39 +89,21 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
111} 89}
112 90
113static int cifs_calc_signature2(const struct kvec *iov, int n_vec, 91static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
114 struct TCP_Server_Info *server, char *signature) 92 const struct mac_key *key, char *signature)
115{ 93{
94 struct MD5Context context;
116 int i; 95 int i;
117 int rc;
118 96
119 if (iov == NULL || server == NULL || signature == NULL) 97 if ((iov == NULL) || (signature == NULL) || (key == NULL))
120 return -EINVAL; 98 return -EINVAL;
121 99
122 if (!server->ntlmssp.sdescmd5) { 100 cifs_MD5_init(&context);
123 cERROR(1, "cifs_calc_signature2: can't generate signature\n"); 101 cifs_MD5_update(&context, (char *)&key->data, key->len);
124 return -1;
125 }
126
127 rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
128 if (rc) {
129 cERROR(1, "cifs_calc_signature2: oould not init md5\n");
130 return rc;
131 }
132
133 if (server->secType == RawNTLMSSP)
134 crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
135 server->session_key.data.ntlmv2.key,
136 CIFS_NTLMV2_SESSKEY_SIZE);
137 else
138 crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
139 (char *)&server->session_key.data,
140 server->session_key.len);
141
142 for (i = 0; i < n_vec; i++) { 102 for (i = 0; i < n_vec; i++) {
143 if (iov[i].iov_len == 0) 103 if (iov[i].iov_len == 0)
144 continue; 104 continue;
145 if (iov[i].iov_base == NULL) { 105 if (iov[i].iov_base == NULL) {
146 cERROR(1, "cifs_calc_signature2: null iovec entry"); 106 cERROR(1, "null iovec entry");
147 return -EIO; 107 return -EIO;
148 } 108 }
149 /* The first entry includes a length field (which does not get 109 /* The first entry includes a length field (which does not get
@@ -151,18 +111,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
151 if (i == 0) { 111 if (i == 0) {
152 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ 112 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
153 break; /* nothing to sign or corrupt header */ 113 break; /* nothing to sign or corrupt header */
154 crypto_shash_update(&server->ntlmssp.sdescmd5->shash, 114 cifs_MD5_update(&context, iov[0].iov_base+4,
155 iov[i].iov_base + 4, iov[i].iov_len - 4); 115 iov[0].iov_len-4);
156 } else 116 } else
157 crypto_shash_update(&server->ntlmssp.sdescmd5->shash, 117 cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
158 iov[i].iov_base, iov[i].iov_len);
159 } 118 }
160 119
161 rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); 120 cifs_MD5_final(signature, &context);
162 121
163 return rc; 122 return 0;
164} 123}
165 124
125
166int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 126int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
167 __u32 *pexpected_response_sequence_number) 127 __u32 *pexpected_response_sequence_number)
168{ 128{
@@ -185,7 +145,8 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
185 server->sequence_number++; 145 server->sequence_number++;
186 spin_unlock(&GlobalMid_Lock); 146 spin_unlock(&GlobalMid_Lock);
187 147
188 rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); 148 rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key,
149 smb_signature);
189 if (rc) 150 if (rc)
190 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 151 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
191 else 152 else
@@ -195,14 +156,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
195} 156}
196 157
197int cifs_verify_signature(struct smb_hdr *cifs_pdu, 158int cifs_verify_signature(struct smb_hdr *cifs_pdu,
198 struct TCP_Server_Info *server, 159 const struct mac_key *mac_key,
199 __u32 expected_sequence_number) 160 __u32 expected_sequence_number)
200{ 161{
201 int rc; 162 unsigned int rc;
202 char server_response_sig[8]; 163 char server_response_sig[8];
203 char what_we_think_sig_should_be[20]; 164 char what_we_think_sig_should_be[20];
204 165
205 if (cifs_pdu == NULL || server == NULL) 166 if ((cifs_pdu == NULL) || (mac_key == NULL))
206 return -EINVAL; 167 return -EINVAL;
207 168
208 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 169 if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -231,7 +192,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
231 cpu_to_le32(expected_sequence_number); 192 cpu_to_le32(expected_sequence_number);
232 cifs_pdu->Signature.Sequence.Reserved = 0; 193 cifs_pdu->Signature.Sequence.Reserved = 0;
233 194
234 rc = cifs_calculate_signature(cifs_pdu, server, 195 rc = cifs_calculate_signature(cifs_pdu, mac_key,
235 what_we_think_sig_should_be); 196 what_we_think_sig_should_be);
236 197
237 if (rc) 198 if (rc)
@@ -248,7 +209,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
248} 209}
249 210
250/* We fill in key by putting in 40 byte array which was allocated by caller */ 211/* We fill in key by putting in 40 byte array which was allocated by caller */
251int cifs_calculate_session_key(struct session_key *key, const char *rn, 212int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
252 const char *password) 213 const char *password)
253{ 214{
254 char temp_key[16]; 215 char temp_key[16];
@@ -306,52 +267,38 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
306{ 267{
307 int rc = 0; 268 int rc = 0;
308 int len; 269 int len;
309 char nt_hash[CIFS_NTHASH_SIZE]; 270 char nt_hash[16];
271 struct HMACMD5Context *pctxt;
310 wchar_t *user; 272 wchar_t *user;
311 wchar_t *domain; 273 wchar_t *domain;
312 wchar_t *server;
313 274
314 if (!ses->server->ntlmssp.sdeschmacmd5) { 275 pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
315 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); 276
316 return -1; 277 if (pctxt == NULL)
317 } 278 return -ENOMEM;
318 279
319 /* calculate md4 hash of password */ 280 /* calculate md4 hash of password */
320 E_md4hash(ses->password, nt_hash); 281 E_md4hash(ses->password, nt_hash);
321 282
322 crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash, 283 /* convert Domainname to unicode and uppercase */
323 CIFS_NTHASH_SIZE); 284 hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
324
325 rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
326 if (rc) {
327 cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
328 return rc;
329 }
330 285
331 /* convert ses->userName to unicode and uppercase */ 286 /* convert ses->userName to unicode and uppercase */
332 len = strlen(ses->userName); 287 len = strlen(ses->userName);
333 user = kmalloc(2 + (len * 2), GFP_KERNEL); 288 user = kmalloc(2 + (len * 2), GFP_KERNEL);
334 if (user == NULL) { 289 if (user == NULL)
335 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
336 rc = -ENOMEM;
337 goto calc_exit_2; 290 goto calc_exit_2;
338 }
339 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 291 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
340 UniStrupr(user); 292 UniStrupr(user);
341 293 hmac_md5_update((char *)user, 2*len, pctxt);
342 crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
343 (char *)user, 2 * len);
344 294
345 /* convert ses->domainName to unicode and uppercase */ 295 /* convert ses->domainName to unicode and uppercase */
346 if (ses->domainName) { 296 if (ses->domainName) {
347 len = strlen(ses->domainName); 297 len = strlen(ses->domainName);
348 298
349 domain = kmalloc(2 + (len * 2), GFP_KERNEL); 299 domain = kmalloc(2 + (len * 2), GFP_KERNEL);
350 if (domain == NULL) { 300 if (domain == NULL)
351 cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
352 rc = -ENOMEM;
353 goto calc_exit_1; 301 goto calc_exit_1;
354 }
355 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, 302 len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
356 nls_cp); 303 nls_cp);
357 /* the following line was removed since it didn't work well 304 /* the following line was removed since it didn't work well
@@ -359,292 +306,65 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
359 Maybe converting the domain name earlier makes sense */ 306 Maybe converting the domain name earlier makes sense */
360 /* UniStrupr(domain); */ 307 /* UniStrupr(domain); */
361 308
362 crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, 309 hmac_md5_update((char *)domain, 2*len, pctxt);
363 (char *)domain, 2 * len);
364 310
365 kfree(domain); 311 kfree(domain);
366 } else if (ses->serverName) {
367 len = strlen(ses->serverName);
368
369 server = kmalloc(2 + (len * 2), GFP_KERNEL);
370 if (server == NULL) {
371 cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
372 rc = -ENOMEM;
373 goto calc_exit_1;
374 }
375 len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
376 nls_cp);
377 /* the following line was removed since it didn't work well
378 with lower cased domain name that passed as an option.
379 Maybe converting the domain name earlier makes sense */
380 /* UniStrupr(domain); */
381
382 crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
383 (char *)server, 2 * len);
384
385 kfree(server);
386 } 312 }
387
388 rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
389 ses->server->ntlmv2_hash);
390
391calc_exit_1: 313calc_exit_1:
392 kfree(user); 314 kfree(user);
393calc_exit_2: 315calc_exit_2:
394 /* BB FIXME what about bytes 24 through 40 of the signing key? 316 /* BB FIXME what about bytes 24 through 40 of the signing key?
395 compare with the NTLM example */ 317 compare with the NTLM example */
318 hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
396 319
320 kfree(pctxt);
397 return rc; 321 return rc;
398} 322}
399 323
400static int 324void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
401find_domain_name(struct cifsSesInfo *ses)
402{
403 int rc = 0;
404 unsigned int attrsize;
405 unsigned int type;
406 unsigned char *blobptr;
407 struct ntlmssp2_name *attrptr;
408
409 if (ses->server->tiblob) {
410 blobptr = ses->server->tiblob;
411 attrptr = (struct ntlmssp2_name *) blobptr;
412
413 while ((type = attrptr->type) != 0) {
414 blobptr += 2; /* advance attr type */
415 attrsize = attrptr->length;
416 blobptr += 2; /* advance attr size */
417 if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
418 if (!ses->domainName) {
419 ses->domainName =
420 kmalloc(attrptr->length + 1,
421 GFP_KERNEL);
422 if (!ses->domainName)
423 return -ENOMEM;
424 cifs_from_ucs2(ses->domainName,
425 (__le16 *)blobptr,
426 attrptr->length,
427 attrptr->length,
428 load_nls_default(), false);
429 }
430 }
431 blobptr += attrsize; /* advance attr value */
432 attrptr = (struct ntlmssp2_name *) blobptr;
433 }
434 } else {
435 ses->server->tilen = 2 * sizeof(struct ntlmssp2_name);
436 ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL);
437 if (!ses->server->tiblob) {
438 ses->server->tilen = 0;
439 cERROR(1, "Challenge target info allocation failure");
440 return -ENOMEM;
441 }
442 memset(ses->server->tiblob, 0x0, ses->server->tilen);
443 attrptr = (struct ntlmssp2_name *) ses->server->tiblob;
444 attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
445 }
446
447 return rc;
448}
449
450static int
451CalcNTLMv2_response(const struct TCP_Server_Info *server,
452 char *v2_session_response)
453{
454 int rc;
455
456 if (!server->ntlmssp.sdeschmacmd5) {
457 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
458 return -1;
459 }
460
461 crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
462 CIFS_HMAC_MD5_HASH_SIZE);
463
464 rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash);
465 if (rc) {
466 cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
467 return rc;
468 }
469
470 memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
471 server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
472 crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
473 v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
474 sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
475
476 if (server->tilen)
477 crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
478 server->tiblob, server->tilen);
479
480 rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash,
481 v2_session_response);
482
483 return rc;
484}
485
486int
487setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
488 const struct nls_table *nls_cp) 325 const struct nls_table *nls_cp)
489{ 326{
490 int rc = 0; 327 int rc;
491 struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; 328 struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
329 struct HMACMD5Context context;
492 330
493 buf->blob_signature = cpu_to_le32(0x00000101); 331 buf->blob_signature = cpu_to_le32(0x00000101);
494 buf->reserved = 0; 332 buf->reserved = 0;
495 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 333 buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
496 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); 334 get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
497 buf->reserved2 = 0; 335 buf->reserved2 = 0;
498 336 buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
499 if (!ses->domainName) { 337 buf->names[0].length = 0;
500 rc = find_domain_name(ses); 338 buf->names[1].type = 0;
501 if (rc) { 339 buf->names[1].length = 0;
502 cERROR(1, "could not get domain/server name rc %d", rc);
503 return rc;
504 }
505 }
506 340
507 /* calculate buf->ntlmv2_hash */ 341 /* calculate buf->ntlmv2_hash */
508 rc = calc_ntlmv2_hash(ses, nls_cp); 342 rc = calc_ntlmv2_hash(ses, nls_cp);
509 if (rc) { 343 if (rc)
510 cERROR(1, "could not get v2 hash rc %d", rc);
511 return rc;
512 }
513 rc = CalcNTLMv2_response(ses->server, resp_buf);
514 if (rc) {
515 cERROR(1, "could not get v2 hash rc %d", rc); 344 cERROR(1, "could not get v2 hash rc %d", rc);
516 return rc; 345 CalcNTLMv2_response(ses, resp_buf);
517 }
518
519 if (!ses->server->ntlmssp.sdeschmacmd5) {
520 cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
521 return -1;
522 }
523
524 crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
525 ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
526 346
527 rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); 347 /* now calculate the MAC key for NTLMv2 */
528 if (rc) { 348 hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
529 cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n"); 349 hmac_md5_update(resp_buf, 16, &context);
530 return rc; 350 hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context);
531 }
532 351
533 crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, 352 memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf,
534 resp_buf, CIFS_HMAC_MD5_HASH_SIZE); 353 sizeof(struct ntlmv2_resp));
535 354 ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp);
536 rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
537 ses->server->session_key.data.ntlmv2.key);
538
539 memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
540 sizeof(struct ntlmv2_resp));
541 ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp);
542
543 return rc;
544} 355}
545 356
546int 357void CalcNTLMv2_response(const struct cifsSesInfo *ses,
547calc_seckey(struct TCP_Server_Info *server) 358 char *v2_session_response)
548{
549 int rc;
550 unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE];
551 struct crypto_blkcipher *tfm_arc4;
552 struct scatterlist sgin, sgout;
553 struct blkcipher_desc desc;
554
555 get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
556
557 tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)",
558 0, CRYPTO_ALG_ASYNC);
559 if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
560 cERROR(1, "could not allocate " "master crypto API arc4\n");
561 return 1;
562 }
563
564 desc.tfm = tfm_arc4;
565
566 crypto_blkcipher_setkey(tfm_arc4,
567 server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
568 sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
569 sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
570 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
571
572 if (!rc)
573 memcpy(server->session_key.data.ntlmv2.key,
574 sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
575
576 crypto_free_blkcipher(tfm_arc4);
577
578 return 0;
579}
580
581void
582cifs_crypto_shash_release(struct TCP_Server_Info *server)
583{
584 if (server->ntlmssp.md5)
585 crypto_free_shash(server->ntlmssp.md5);
586
587 if (server->ntlmssp.hmacmd5)
588 crypto_free_shash(server->ntlmssp.hmacmd5);
589
590 kfree(server->ntlmssp.sdeschmacmd5);
591
592 kfree(server->ntlmssp.sdescmd5);
593}
594
595int
596cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
597{ 359{
598 int rc; 360 struct HMACMD5Context context;
599 unsigned int size; 361 /* rest of v2 struct already generated */
600 362 memcpy(v2_session_response + 8, ses->server->cryptKey, 8);
601 server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); 363 hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
602 if (!server->ntlmssp.hmacmd5 ||
603 IS_ERR(server->ntlmssp.hmacmd5)) {
604 cERROR(1, "could not allocate crypto hmacmd5\n");
605 return 1;
606 }
607
608 server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
609 if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
610 cERROR(1, "could not allocate crypto md5\n");
611 rc = 1;
612 goto cifs_crypto_shash_allocate_ret1;
613 }
614
615 size = sizeof(struct shash_desc) +
616 crypto_shash_descsize(server->ntlmssp.hmacmd5);
617 server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
618 if (!server->ntlmssp.sdeschmacmd5) {
619 cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
620 rc = -ENOMEM;
621 goto cifs_crypto_shash_allocate_ret2;
622 }
623 server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5;
624 server->ntlmssp.sdeschmacmd5->shash.flags = 0x0;
625 364
365 hmac_md5_update(v2_session_response+8,
366 sizeof(struct ntlmv2_resp) - 8, &context);
626 367
627 size = sizeof(struct shash_desc) + 368 hmac_md5_final(v2_session_response, &context);
628 crypto_shash_descsize(server->ntlmssp.md5); 369/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
629 server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL);
630 if (!server->ntlmssp.sdescmd5) {
631 cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
632 rc = -ENOMEM;
633 goto cifs_crypto_shash_allocate_ret3;
634 }
635 server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5;
636 server->ntlmssp.sdescmd5->shash.flags = 0x0;
637
638 return 0;
639
640cifs_crypto_shash_allocate_ret3:
641 kfree(server->ntlmssp.sdeschmacmd5);
642
643cifs_crypto_shash_allocate_ret2:
644 crypto_free_shash(server->ntlmssp.md5);
645
646cifs_crypto_shash_allocate_ret1:
647 crypto_free_shash(server->ntlmssp.hmacmd5);
648
649 return rc;
650} 370}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c9d0cfc086eb..0cdfb8c32ac6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,9 +25,6 @@
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include "cifs_fs_sb.h" 26#include "cifs_fs_sb.h"
27#include "cifsacl.h" 27#include "cifsacl.h"
28#include <crypto/internal/hash.h>
29#include <linux/scatterlist.h>
30
31/* 28/*
32 * The sizes of various internal tables and strings 29 * The sizes of various internal tables and strings
33 */ 30 */
@@ -100,7 +97,7 @@ enum protocolEnum {
100 /* Netbios frames protocol not supported at this time */ 97 /* Netbios frames protocol not supported at this time */
101}; 98};
102 99
103struct session_key { 100struct mac_key {
104 unsigned int len; 101 unsigned int len;
105 union { 102 union {
106 char ntlm[CIFS_SESS_KEY_SIZE + 16]; 103 char ntlm[CIFS_SESS_KEY_SIZE + 16];
@@ -123,21 +120,6 @@ struct cifs_cred {
123 struct cifs_ace *aces; 120 struct cifs_ace *aces;
124}; 121};
125 122
126struct sdesc {
127 struct shash_desc shash;
128 char ctx[];
129};
130
131struct ntlmssp_auth {
132 __u32 client_flags;
133 __u32 server_flags;
134 unsigned char ciphertext[CIFS_CPHTXT_SIZE];
135 struct crypto_shash *hmacmd5;
136 struct crypto_shash *md5;
137 struct sdesc *sdeschmacmd5;
138 struct sdesc *sdescmd5;
139};
140
141/* 123/*
142 ***************************************************************** 124 *****************************************************************
143 * Except the CIFS PDUs themselves all the 125 * Except the CIFS PDUs themselves all the
@@ -200,14 +182,11 @@ struct TCP_Server_Info {
200 /* 16th byte of RFC1001 workstation name is always null */ 182 /* 16th byte of RFC1001 workstation name is always null */
201 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; 183 char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
202 __u32 sequence_number; /* needed for CIFS PDU signature */ 184 __u32 sequence_number; /* needed for CIFS PDU signature */
203 struct session_key session_key; 185 struct mac_key mac_signing_key;
204 char ntlmv2_hash[16]; 186 char ntlmv2_hash[16];
205 unsigned long lstrp; /* when we got last response from this server */ 187 unsigned long lstrp; /* when we got last response from this server */
206 u16 dialect; /* dialect index that server chose */ 188 u16 dialect; /* dialect index that server chose */
207 /* extended security flavors that server supports */ 189 /* extended security flavors that server supports */
208 unsigned int tilen; /* length of the target info blob */
209 unsigned char *tiblob; /* target info blob in challenge response */
210 struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */
211 bool sec_kerberos; /* supports plain Kerberos */ 190 bool sec_kerberos; /* supports plain Kerberos */
212 bool sec_mskerberos; /* supports legacy MS Kerberos */ 191 bool sec_mskerberos; /* supports legacy MS Kerberos */
213 bool sec_kerberosu2u; /* supports U2U Kerberos */ 192 bool sec_kerberosu2u; /* supports U2U Kerberos */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 320e0fd0ba7b..14d036d8db11 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -134,12 +134,6 @@
134 * Size of the session key (crypto key encrypted with the password 134 * Size of the session key (crypto key encrypted with the password
135 */ 135 */
136#define CIFS_SESS_KEY_SIZE (24) 136#define CIFS_SESS_KEY_SIZE (24)
137#define CIFS_CLIENT_CHALLENGE_SIZE (8)
138#define CIFS_SERVER_CHALLENGE_SIZE (8)
139#define CIFS_HMAC_MD5_HASH_SIZE (16)
140#define CIFS_CPHTXT_SIZE (16)
141#define CIFS_NTLMV2_SESSKEY_SIZE (16)
142#define CIFS_NTHASH_SIZE (16)
143 137
144/* 138/*
145 * Maximum user name length 139 * Maximum user name length
@@ -669,6 +663,7 @@ struct ntlmv2_resp {
669 __le64 time; 663 __le64 time;
670 __u64 client_chal; /* random */ 664 __u64 client_chal; /* random */
671 __u32 reserved2; 665 __u32 reserved2;
666 struct ntlmssp2_name names[2];
672 /* array of name entries could follow ending in minimum 4 byte struct */ 667 /* array of name entries could follow ending in minimum 4 byte struct */
673} __attribute__((packed)); 668} __attribute__((packed));
674 669
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1378d9133844..1d60c655e3e0 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
87extern int decode_negTokenInit(unsigned char *security_blob, int length, 87extern int decode_negTokenInit(unsigned char *security_blob, int length,
88 struct TCP_Server_Info *server); 88 struct TCP_Server_Info *server);
89extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 89extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
90extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
90extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 91extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
91 unsigned short int port); 92 const unsigned short int port);
92extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 93extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
93extern void header_assemble(struct smb_hdr *, char /* command */ , 94extern void header_assemble(struct smb_hdr *, char /* command */ ,
94 const struct cifsTconInfo *, int /* length of 95 const struct cifsTconInfo *, int /* length of
@@ -361,15 +362,13 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
361extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 362extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
362 __u32 *); 363 __u32 *);
363extern int cifs_verify_signature(struct smb_hdr *, 364extern int cifs_verify_signature(struct smb_hdr *,
364 struct TCP_Server_Info *server, 365 const struct mac_key *mac_key,
365 __u32 expected_sequence_number); 366 __u32 expected_sequence_number);
366extern int cifs_calculate_session_key(struct session_key *key, const char *rn, 367extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
367 const char *pass); 368 const char *pass);
368extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, 369extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
370extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
369 const struct nls_table *); 371 const struct nls_table *);
370extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
371extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
372extern int calc_seckey(struct TCP_Server_Info *);
373#ifdef CONFIG_CIFS_WEAK_PW_HASH 372#ifdef CONFIG_CIFS_WEAK_PW_HASH
374extern void calc_lanman_hash(const char *password, const char *cryptkey, 373extern void calc_lanman_hash(const char *password, const char *cryptkey,
375 bool encrypt, char *lnm_session_key); 374 bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4bda920d1f75..7e83b356cc9e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -232,7 +232,7 @@ static int
232small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 232small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
233 void **request_buf) 233 void **request_buf)
234{ 234{
235 int rc = 0; 235 int rc;
236 236
237 rc = cifs_reconnect_tcon(tcon, smb_command); 237 rc = cifs_reconnect_tcon(tcon, smb_command);
238 if (rc) 238 if (rc)
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
250 if (tcon != NULL) 250 if (tcon != NULL)
251 cifs_stats_inc(&tcon->num_smbs_sent); 251 cifs_stats_inc(&tcon->num_smbs_sent);
252 252
253 return rc; 253 return 0;
254} 254}
255 255
256int 256int
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct,
281 281
282/* If the return code is zero, this function must fill in request_buf pointer */ 282/* If the return code is zero, this function must fill in request_buf pointer */
283static int 283static int
284smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 284__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
285 void **request_buf /* returned */ , 285 void **request_buf, void **response_buf)
286 void **response_buf /* returned */ )
287{ 286{
288 int rc = 0;
289
290 rc = cifs_reconnect_tcon(tcon, smb_command);
291 if (rc)
292 return rc;
293
294 *request_buf = cifs_buf_get(); 287 *request_buf = cifs_buf_get();
295 if (*request_buf == NULL) { 288 if (*request_buf == NULL) {
296 /* BB should we add a retry in here if not a writepage? */ 289 /* BB should we add a retry in here if not a writepage? */
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
309 if (tcon != NULL) 302 if (tcon != NULL)
310 cifs_stats_inc(&tcon->num_smbs_sent); 303 cifs_stats_inc(&tcon->num_smbs_sent);
311 304
312 return rc; 305 return 0;
306}
307
308/* If the return code is zero, this function must fill in request_buf pointer */
309static int
310smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
311 void **request_buf, void **response_buf)
312{
313 int rc;
314
315 rc = cifs_reconnect_tcon(tcon, smb_command);
316 if (rc)
317 return rc;
318
319 return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
320}
321
322static int
323smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon,
324 void **request_buf, void **response_buf)
325{
326 if (tcon->ses->need_reconnect || tcon->need_reconnect)
327 return -EHOSTDOWN;
328
329 return __smb_init(smb_command, wct, tcon, request_buf, response_buf);
313} 330}
314 331
315static int validate_t2(struct smb_t2_rsp *pSMB) 332static int validate_t2(struct smb_t2_rsp *pSMB)
@@ -604,14 +621,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
604 else 621 else
605 rc = -EINVAL; 622 rc = -EINVAL;
606 623
607 if (server->secType == Kerberos) { 624 if (server->sec_kerberos || server->sec_mskerberos)
608 if (!server->sec_kerberos && 625 server->secType = Kerberos;
609 !server->sec_mskerberos) 626 else if (server->sec_ntlmssp)
610 rc = -EOPNOTSUPP; 627 server->secType = RawNTLMSSP;
611 } else if (server->secType == RawNTLMSSP) { 628 else
612 if (!server->sec_ntlmssp)
613 rc = -EOPNOTSUPP;
614 } else
615 rc = -EOPNOTSUPP; 629 rc = -EOPNOTSUPP;
616 } 630 }
617 } else 631 } else
@@ -4537,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon)
4537 4551
4538 cFYI(1, "In QFSUnixInfo"); 4552 cFYI(1, "In QFSUnixInfo");
4539QFSUnixRetry: 4553QFSUnixRetry:
4540 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4554 rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
4541 (void **) &pSMBr); 4555 (void **) &pSMB, (void **) &pSMBr);
4542 if (rc) 4556 if (rc)
4543 return rc; 4557 return rc;
4544 4558
@@ -4607,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
4607 cFYI(1, "In SETFSUnixInfo"); 4621 cFYI(1, "In SETFSUnixInfo");
4608SETFSUnixRetry: 4622SETFSUnixRetry:
4609 /* BB switch to small buf init to save memory */ 4623 /* BB switch to small buf init to save memory */
4610 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4624 rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon,
4611 (void **) &pSMBr); 4625 (void **) &pSMB, (void **) &pSMBr);
4612 if (rc) 4626 if (rc)
4613 return rc; 4627 return rc;
4614 4628
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ec0ea4a43bdb..88c84a38bccb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -400,7 +400,9 @@ incomplete_rcv:
400 cFYI(1, "call to reconnect done"); 400 cFYI(1, "call to reconnect done");
401 csocket = server->ssocket; 401 csocket = server->ssocket;
402 continue; 402 continue;
403 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { 403 } else if (length == -ERESTARTSYS ||
404 length == -EAGAIN ||
405 length == -EINTR) {
404 msleep(1); /* minimum sleep to prevent looping 406 msleep(1); /* minimum sleep to prevent looping
405 allowing socket to clear and app threads to set 407 allowing socket to clear and app threads to set
406 tcpStatus CifsNeedReconnect if server hung */ 408 tcpStatus CifsNeedReconnect if server hung */
@@ -414,18 +416,6 @@ incomplete_rcv:
414 } else 416 } else
415 continue; 417 continue;
416 } else if (length <= 0) { 418 } else if (length <= 0) {
417 if (server->tcpStatus == CifsNew) {
418 cFYI(1, "tcp session abend after SMBnegprot");
419 /* some servers kill the TCP session rather than
420 returning an SMB negprot error, in which
421 case reconnecting here is not going to help,
422 and so simply return error to mount */
423 break;
424 }
425 if (!try_to_freeze() && (length == -EINTR)) {
426 cFYI(1, "cifsd thread killed");
427 break;
428 }
429 cFYI(1, "Reconnect after unexpected peek error %d", 419 cFYI(1, "Reconnect after unexpected peek error %d",
430 length); 420 length);
431 cifs_reconnect(server); 421 cifs_reconnect(server);
@@ -466,27 +456,19 @@ incomplete_rcv:
466 an error on SMB negprot response */ 456 an error on SMB negprot response */
467 cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", 457 cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
468 pdu_length); 458 pdu_length);
469 if (server->tcpStatus == CifsNew) { 459 /* give server a second to clean up */
470 /* if nack on negprot (rather than 460 msleep(1000);
471 ret of smb negprot error) reconnecting 461 /* always try 445 first on reconnect since we get NACK
472 not going to help, ret error to mount */ 462 * on some if we ever connected to port 139 (the NACK
473 break; 463 * is since we do not begin with RFC1001 session
474 } else { 464 * initialize frame)
475 /* give server a second to 465 */
476 clean up before reconnect attempt */ 466 cifs_set_port((struct sockaddr *)
477 msleep(1000); 467 &server->addr.sockAddr, CIFS_PORT);
478 /* always try 445 first on reconnect 468 cifs_reconnect(server);
479 since we get NACK on some if we ever 469 csocket = server->ssocket;
480 connected to port 139 (the NACK is 470 wake_up(&server->response_q);
481 since we do not begin with RFC1001 471 continue;
482 session initialize frame) */
483 server->addr.sockAddr.sin_port =
484 htons(CIFS_PORT);
485 cifs_reconnect(server);
486 csocket = server->ssocket;
487 wake_up(&server->response_q);
488 continue;
489 }
490 } else if (temp != (char) 0) { 472 } else if (temp != (char) 0) {
491 cERROR(1, "Unknown RFC 1002 frame"); 473 cERROR(1, "Unknown RFC 1002 frame");
492 cifs_dump_mem(" Received Data: ", (char *)smb_buffer, 474 cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
@@ -522,8 +504,7 @@ incomplete_rcv:
522 total_read += length) { 504 total_read += length) {
523 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, 505 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
524 pdu_length - total_read, 0); 506 pdu_length - total_read, 0);
525 if ((server->tcpStatus == CifsExiting) || 507 if (server->tcpStatus == CifsExiting) {
526 (length == -EINTR)) {
527 /* then will exit */ 508 /* then will exit */
528 reconnect = 2; 509 reconnect = 2;
529 break; 510 break;
@@ -534,8 +515,9 @@ incomplete_rcv:
534 /* Now we will reread sock */ 515 /* Now we will reread sock */
535 reconnect = 1; 516 reconnect = 1;
536 break; 517 break;
537 } else if ((length == -ERESTARTSYS) || 518 } else if (length == -ERESTARTSYS ||
538 (length == -EAGAIN)) { 519 length == -EAGAIN ||
520 length == -EINTR) {
539 msleep(1); /* minimum sleep to prevent looping, 521 msleep(1); /* minimum sleep to prevent looping,
540 allowing socket to clear and app 522 allowing socket to clear and app
541 threads to set tcpStatus 523 threads to set tcpStatus
@@ -1708,7 +1690,6 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1708 CIFSSMBLogoff(xid, ses); 1690 CIFSSMBLogoff(xid, ses);
1709 _FreeXid(xid); 1691 _FreeXid(xid);
1710 } 1692 }
1711 cifs_crypto_shash_release(server);
1712 sesInfoFree(ses); 1693 sesInfoFree(ses);
1713 cifs_put_tcp_session(server); 1694 cifs_put_tcp_session(server);
1714} 1695}
@@ -1725,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1725 if (ses) { 1706 if (ses) {
1726 cFYI(1, "Existing smb sess found (status=%d)", ses->status); 1707 cFYI(1, "Existing smb sess found (status=%d)", ses->status);
1727 1708
1728 /* existing SMB ses has a server reference already */
1729 cifs_put_tcp_session(server);
1730
1731 mutex_lock(&ses->session_mutex); 1709 mutex_lock(&ses->session_mutex);
1732 rc = cifs_negotiate_protocol(xid, ses); 1710 rc = cifs_negotiate_protocol(xid, ses);
1733 if (rc) { 1711 if (rc) {
@@ -1750,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1750 } 1728 }
1751 } 1729 }
1752 mutex_unlock(&ses->session_mutex); 1730 mutex_unlock(&ses->session_mutex);
1731
1732 /* existing SMB ses has a server reference already */
1733 cifs_put_tcp_session(server);
1753 FreeXid(xid); 1734 FreeXid(xid);
1754 return ses; 1735 return ses;
1755 } 1736 }
@@ -1788,23 +1769,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1788 ses->linux_uid = volume_info->linux_uid; 1769 ses->linux_uid = volume_info->linux_uid;
1789 ses->overrideSecFlg = volume_info->secFlg; 1770 ses->overrideSecFlg = volume_info->secFlg;
1790 1771
1791 rc = cifs_crypto_shash_allocate(server);
1792 if (rc) {
1793 cERROR(1, "could not setup hash structures rc %d", rc);
1794 goto get_ses_fail;
1795 }
1796 server->tilen = 0;
1797 server->tiblob = NULL;
1798
1799 mutex_lock(&ses->session_mutex); 1772 mutex_lock(&ses->session_mutex);
1800 rc = cifs_negotiate_protocol(xid, ses); 1773 rc = cifs_negotiate_protocol(xid, ses);
1801 if (!rc) 1774 if (!rc)
1802 rc = cifs_setup_session(xid, ses, volume_info->local_nls); 1775 rc = cifs_setup_session(xid, ses, volume_info->local_nls);
1803 mutex_unlock(&ses->session_mutex); 1776 mutex_unlock(&ses->session_mutex);
1804 if (rc) { 1777 if (rc)
1805 cifs_crypto_shash_release(ses->server);
1806 goto get_ses_fail; 1778 goto get_ses_fail;
1807 }
1808 1779
1809 /* success, put it on the list */ 1780 /* success, put it on the list */
1810 write_lock(&cifs_tcp_ses_lock); 1781 write_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 86a164f08a74..53cce8cc2224 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -801,6 +801,8 @@ retry_iget5_locked:
801 inode->i_flags |= S_NOATIME | S_NOCMTIME; 801 inode->i_flags |= S_NOATIME | S_NOCMTIME;
802 if (inode->i_state & I_NEW) { 802 if (inode->i_state & I_NEW) {
803 inode->i_ino = hash; 803 inode->i_ino = hash;
804 if (S_ISREG(inode->i_mode))
805 inode->i_data.backing_dev_info = sb->s_bdi;
804#ifdef CONFIG_CIFS_FSCACHE 806#ifdef CONFIG_CIFS_FSCACHE
805 /* initialize per-inode cache cookie pointer */ 807 /* initialize per-inode cache cookie pointer */
806 CIFS_I(inode)->fscache = NULL; 808 CIFS_I(inode)->fscache = NULL;
@@ -1462,29 +1464,18 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1462{ 1464{
1463 char *fromName = NULL; 1465 char *fromName = NULL;
1464 char *toName = NULL; 1466 char *toName = NULL;
1465 struct cifs_sb_info *cifs_sb_source; 1467 struct cifs_sb_info *cifs_sb;
1466 struct cifs_sb_info *cifs_sb_target;
1467 struct cifsTconInfo *tcon; 1468 struct cifsTconInfo *tcon;
1468 FILE_UNIX_BASIC_INFO *info_buf_source = NULL; 1469 FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
1469 FILE_UNIX_BASIC_INFO *info_buf_target; 1470 FILE_UNIX_BASIC_INFO *info_buf_target;
1470 int xid, rc, tmprc; 1471 int xid, rc, tmprc;
1471 1472
1472 cifs_sb_target = CIFS_SB(target_dir->i_sb); 1473 cifs_sb = CIFS_SB(source_dir->i_sb);
1473 cifs_sb_source = CIFS_SB(source_dir->i_sb); 1474 tcon = cifs_sb->tcon;
1474 tcon = cifs_sb_source->tcon;
1475 1475
1476 xid = GetXid(); 1476 xid = GetXid();
1477 1477
1478 /* 1478 /*
1479 * BB: this might be allowed if same server, but different share.
1480 * Consider adding support for this
1481 */
1482 if (tcon != cifs_sb_target->tcon) {
1483 rc = -EXDEV;
1484 goto cifs_rename_exit;
1485 }
1486
1487 /*
1488 * we already have the rename sem so we do not need to 1479 * we already have the rename sem so we do not need to
1489 * grab it again here to protect the path integrity 1480 * grab it again here to protect the path integrity
1490 */ 1481 */
@@ -1519,17 +1510,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1519 info_buf_target = info_buf_source + 1; 1510 info_buf_target = info_buf_source + 1;
1520 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, 1511 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
1521 info_buf_source, 1512 info_buf_source,
1522 cifs_sb_source->local_nls, 1513 cifs_sb->local_nls,
1523 cifs_sb_source->mnt_cifs_flags & 1514 cifs_sb->mnt_cifs_flags &
1524 CIFS_MOUNT_MAP_SPECIAL_CHR); 1515 CIFS_MOUNT_MAP_SPECIAL_CHR);
1525 if (tmprc != 0) 1516 if (tmprc != 0)
1526 goto unlink_target; 1517 goto unlink_target;
1527 1518
1528 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, 1519 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName,
1529 toName, info_buf_target, 1520 info_buf_target,
1530 cifs_sb_target->local_nls, 1521 cifs_sb->local_nls,
1531 /* remap based on source sb */ 1522 cifs_sb->mnt_cifs_flags &
1532 cifs_sb_source->mnt_cifs_flags &
1533 CIFS_MOUNT_MAP_SPECIAL_CHR); 1523 CIFS_MOUNT_MAP_SPECIAL_CHR);
1534 1524
1535 if (tmprc == 0 && (info_buf_source->UniqueId == 1525 if (tmprc == 0 && (info_buf_source->UniqueId ==
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index f97851119e6c..9aad47a2d62f 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
206} 206}
207 207
208int 208int
209cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, 209cifs_set_port(struct sockaddr *addr, const unsigned short int port)
210 const unsigned short int port)
211{ 210{
212 if (!cifs_convert_address(dst, src, len)) 211 switch (addr->sa_family) {
213 return 0;
214
215 switch (dst->sa_family) {
216 case AF_INET: 212 case AF_INET:
217 ((struct sockaddr_in *)dst)->sin_port = htons(port); 213 ((struct sockaddr_in *)addr)->sin_port = htons(port);
218 break; 214 break;
219 case AF_INET6: 215 case AF_INET6:
220 ((struct sockaddr_in6 *)dst)->sin6_port = htons(port); 216 ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
221 break; 217 break;
222 default: 218 default:
223 return 0; 219 return 0;
224 } 220 }
225
226 return 1; 221 return 1;
227} 222}
228 223
224int
225cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
226 const unsigned short int port)
227{
228 if (!cifs_convert_address(dst, src, len))
229 return 0;
230 return cifs_set_port(dst, port);
231}
232
229/***************************************************************************** 233/*****************************************************************************
230convert a NT status code to a dos class/code 234convert a NT status code to a dos class/code
231 *****************************************************************************/ 235 *****************************************************************************/
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 1db0f0746a5b..49c9a4e75319 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -61,19 +61,6 @@
61#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 61#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
62#define NTLMSSP_NEGOTIATE_56 0x80000000 62#define NTLMSSP_NEGOTIATE_56 0x80000000
63 63
64/* Define AV Pair Field IDs */
65#define NTLMSSP_AV_EOL 0
66#define NTLMSSP_AV_NB_COMPUTER_NAME 1
67#define NTLMSSP_AV_NB_DOMAIN_NAME 2
68#define NTLMSSP_AV_DNS_COMPUTER_NAME 3
69#define NTLMSSP_AV_DNS_DOMAIN_NAME 4
70#define NTLMSSP_AV_DNS_TREE_NAME 5
71#define NTLMSSP_AV_FLAGS 6
72#define NTLMSSP_AV_TIMESTAMP 7
73#define NTLMSSP_AV_RESTRICTION 8
74#define NTLMSSP_AV_TARGET_NAME 9
75#define NTLMSSP_AV_CHANNEL_BINDINGS 10
76
77/* Although typedefs are not commonly used for structure definitions */ 64/* Although typedefs are not commonly used for structure definitions */
78/* in the Linux kernel, in this particular case they are useful */ 65/* in the Linux kernel, in this particular case they are useful */
79/* to more closely match the standards document for NTLMSSP from */ 66/* to more closely match the standards document for NTLMSSP from */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 795095f4eac6..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -383,9 +383,6 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
383static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, 383static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
384 struct cifsSesInfo *ses) 384 struct cifsSesInfo *ses)
385{ 385{
386 unsigned int tioffset; /* challeng message target info area */
387 unsigned int tilen; /* challeng message target info area length */
388
389 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; 386 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
390 387
391 if (blob_len < sizeof(CHALLENGE_MESSAGE)) { 388 if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
@@ -408,20 +405,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
408 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
409 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 406 we must set the MIC field of the AUTHENTICATE_MESSAGE */
410 407
411 ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags);
412
413 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
414 tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
415 ses->server->tilen = tilen;
416 if (tilen) {
417 ses->server->tiblob = kmalloc(tilen, GFP_KERNEL);
418 if (!ses->server->tiblob) {
419 cERROR(1, "Challenge target info allocation failure");
420 return -ENOMEM;
421 }
422 memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen);
423 }
424
425 return 0; 408 return 0;
426} 409}
427 410
@@ -442,13 +425,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
442 /* BB is NTLMV2 session security format easier to use here? */ 425 /* BB is NTLMV2 session security format easier to use here? */
443 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | 426 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
444 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | 427 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
445 NTLMSSP_NEGOTIATE_NTLM; 428 NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
446 if (ses->server->secMode & 429 if (ses->server->secMode &
447 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 430 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
448 flags |= NTLMSSP_NEGOTIATE_SIGN | 431 flags |= NTLMSSP_NEGOTIATE_SIGN;
449 NTLMSSP_NEGOTIATE_KEY_XCH | 432 if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
450 NTLMSSP_NEGOTIATE_EXTENDED_SEC; 433 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
451 }
452 434
453 sec_blob->NegotiateFlags |= cpu_to_le32(flags); 435 sec_blob->NegotiateFlags |= cpu_to_le32(flags);
454 436
@@ -469,12 +451,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
469 struct cifsSesInfo *ses, 451 struct cifsSesInfo *ses,
470 const struct nls_table *nls_cp, bool first) 452 const struct nls_table *nls_cp, bool first)
471{ 453{
472 int rc;
473 unsigned int size;
474 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; 454 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
475 __u32 flags; 455 __u32 flags;
476 unsigned char *tmp; 456 unsigned char *tmp;
477 struct ntlmv2_resp ntlmv2_response = {}; 457 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
478 458
479 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); 459 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
480 sec_blob->MessageType = NtLmAuthenticate; 460 sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +477,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
497 sec_blob->LmChallengeResponse.Length = 0; 477 sec_blob->LmChallengeResponse.Length = 0;
498 sec_blob->LmChallengeResponse.MaximumLength = 0; 478 sec_blob->LmChallengeResponse.MaximumLength = 0;
499 479
500 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); 480 /* calculate session key, BB what about adding similar ntlmv2 path? */
501 rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); 481 SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
502 if (rc) { 482 if (first)
503 cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc); 483 cifs_calculate_mac_key(&ses->server->mac_signing_key,
504 goto setup_ntlmv2_ret; 484 ntlm_session_key, ses->password);
505 }
506 size = sizeof(struct ntlmv2_resp);
507 memcpy(tmp, (char *)&ntlmv2_response, size);
508 tmp += size;
509 if (ses->server->tilen > 0) {
510 memcpy(tmp, ses->server->tiblob, ses->server->tilen);
511 tmp += ses->server->tilen;
512 } else
513 ses->server->tilen = 0;
514 485
515 sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + 486 memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
516 ses->server->tilen); 487 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
488 sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
517 sec_blob->NtChallengeResponse.MaximumLength = 489 sec_blob->NtChallengeResponse.MaximumLength =
518 cpu_to_le16(size + ses->server->tilen); 490 cpu_to_le16(CIFS_SESS_KEY_SIZE);
491
492 tmp += CIFS_SESS_KEY_SIZE;
519 493
520 if (ses->domainName == NULL) { 494 if (ses->domainName == NULL) {
521 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); 495 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -527,6 +501,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
527 len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, 501 len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
528 MAX_USERNAME_SIZE, nls_cp); 502 MAX_USERNAME_SIZE, nls_cp);
529 len *= 2; /* unicode is 2 bytes each */ 503 len *= 2; /* unicode is 2 bytes each */
504 len += 2; /* trailing null */
530 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); 505 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
531 sec_blob->DomainName.Length = cpu_to_le16(len); 506 sec_blob->DomainName.Length = cpu_to_le16(len);
532 sec_blob->DomainName.MaximumLength = cpu_to_le16(len); 507 sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
@@ -543,6 +518,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
543 len = cifs_strtoUCS((__le16 *)tmp, ses->userName, 518 len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
544 MAX_USERNAME_SIZE, nls_cp); 519 MAX_USERNAME_SIZE, nls_cp);
545 len *= 2; /* unicode is 2 bytes each */ 520 len *= 2; /* unicode is 2 bytes each */
521 len += 2; /* trailing null */
546 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 522 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
547 sec_blob->UserName.Length = cpu_to_le16(len); 523 sec_blob->UserName.Length = cpu_to_le16(len);
548 sec_blob->UserName.MaximumLength = cpu_to_le16(len); 524 sec_blob->UserName.MaximumLength = cpu_to_le16(len);
@@ -554,26 +530,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
554 sec_blob->WorkstationName.MaximumLength = 0; 530 sec_blob->WorkstationName.MaximumLength = 0;
555 tmp += 2; 531 tmp += 2;
556 532
557 if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && 533 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
558 !calc_seckey(ses->server)) { 534 sec_blob->SessionKey.Length = 0;
559 memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); 535 sec_blob->SessionKey.MaximumLength = 0;
560 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
561 sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
562 sec_blob->SessionKey.MaximumLength =
563 cpu_to_le16(CIFS_CPHTXT_SIZE);
564 tmp += CIFS_CPHTXT_SIZE;
565 } else {
566 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
567 sec_blob->SessionKey.Length = 0;
568 sec_blob->SessionKey.MaximumLength = 0;
569 }
570
571 ses->server->sequence_number = 0;
572
573setup_ntlmv2_ret:
574 if (ses->server->tilen > 0)
575 kfree(ses->server->tiblob);
576
577 return tmp - pbuffer; 536 return tmp - pbuffer;
578} 537}
579 538
@@ -587,14 +546,15 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
587 return; 546 return;
588} 547}
589 548
590static int setup_ntlmssp_auth_req(char *ntlmsspblob, 549static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
591 struct cifsSesInfo *ses, 550 struct cifsSesInfo *ses,
592 const struct nls_table *nls, bool first_time) 551 const struct nls_table *nls, bool first_time)
593{ 552{
594 int bloblen; 553 int bloblen;
595 554
596 bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls, 555 bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
597 first_time); 556 first_time);
557 pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
598 558
599 return bloblen; 559 return bloblen;
600} 560}
@@ -730,7 +690,7 @@ ssetup_ntlmssp_authenticate:
730 690
731 if (first_time) /* should this be moved into common code 691 if (first_time) /* should this be moved into common code
732 with similar ntlmv2 path? */ 692 with similar ntlmv2 path? */
733 cifs_calculate_session_key(&ses->server->session_key, 693 cifs_calculate_mac_key(&ses->server->mac_signing_key,
734 ntlm_session_key, ses->password); 694 ntlm_session_key, ses->password);
735 /* copy session key */ 695 /* copy session key */
736 696
@@ -769,21 +729,12 @@ ssetup_ntlmssp_authenticate:
769 cpu_to_le16(sizeof(struct ntlmv2_resp)); 729 cpu_to_le16(sizeof(struct ntlmv2_resp));
770 730
771 /* calculate session key */ 731 /* calculate session key */
772 rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
773 if (rc) {
774 kfree(v2_sess_key);
775 goto ssetup_exit;
776 }
777 /* FIXME: calculate MAC key */ 733 /* FIXME: calculate MAC key */
778 memcpy(bcc_ptr, (char *)v2_sess_key, 734 memcpy(bcc_ptr, (char *)v2_sess_key,
779 sizeof(struct ntlmv2_resp)); 735 sizeof(struct ntlmv2_resp));
780 bcc_ptr += sizeof(struct ntlmv2_resp); 736 bcc_ptr += sizeof(struct ntlmv2_resp);
781 kfree(v2_sess_key); 737 kfree(v2_sess_key);
782 if (ses->server->tilen > 0) {
783 memcpy(bcc_ptr, ses->server->tiblob,
784 ses->server->tilen);
785 bcc_ptr += ses->server->tilen;
786 }
787 if (ses->capabilities & CAP_UNICODE) { 738 if (ses->capabilities & CAP_UNICODE) {
788 if (iov[0].iov_len % 2) { 739 if (iov[0].iov_len % 2) {
789 *bcc_ptr = 0; 740 *bcc_ptr = 0;
@@ -814,15 +765,15 @@ ssetup_ntlmssp_authenticate:
814 } 765 }
815 /* bail out if key is too long */ 766 /* bail out if key is too long */
816 if (msg->sesskey_len > 767 if (msg->sesskey_len >
817 sizeof(ses->server->session_key.data.krb5)) { 768 sizeof(ses->server->mac_signing_key.data.krb5)) {
818 cERROR(1, "Kerberos signing key too long (%u bytes)", 769 cERROR(1, "Kerberos signing key too long (%u bytes)",
819 msg->sesskey_len); 770 msg->sesskey_len);
820 rc = -EOVERFLOW; 771 rc = -EOVERFLOW;
821 goto ssetup_exit; 772 goto ssetup_exit;
822 } 773 }
823 if (first_time) { 774 if (first_time) {
824 ses->server->session_key.len = msg->sesskey_len; 775 ses->server->mac_signing_key.len = msg->sesskey_len;
825 memcpy(ses->server->session_key.data.krb5, 776 memcpy(ses->server->mac_signing_key.data.krb5,
826 msg->data, msg->sesskey_len); 777 msg->data, msg->sesskey_len);
827 } 778 }
828 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 779 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -864,28 +815,12 @@ ssetup_ntlmssp_authenticate:
864 if (phase == NtLmNegotiate) { 815 if (phase == NtLmNegotiate) {
865 setup_ntlmssp_neg_req(pSMB, ses); 816 setup_ntlmssp_neg_req(pSMB, ses);
866 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); 817 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
867 iov[1].iov_base = &pSMB->req.SecurityBlob[0];
868 } else if (phase == NtLmAuthenticate) { 818 } else if (phase == NtLmAuthenticate) {
869 int blob_len; 819 int blob_len;
870 char *ntlmsspblob; 820 blob_len = setup_ntlmssp_auth_req(pSMB, ses,
871 821 nls_cp,
872 ntlmsspblob = kmalloc(5 * 822 first_time);
873 sizeof(struct _AUTHENTICATE_MESSAGE),
874 GFP_KERNEL);
875 if (!ntlmsspblob) {
876 cERROR(1, "Can't allocate NTLMSSP");
877 rc = -ENOMEM;
878 goto ssetup_exit;
879 }
880
881 blob_len = setup_ntlmssp_auth_req(ntlmsspblob,
882 ses,
883 nls_cp,
884 first_time);
885 iov[1].iov_len = blob_len; 823 iov[1].iov_len = blob_len;
886 iov[1].iov_base = ntlmsspblob;
887 pSMB->req.SecurityBlobLength =
888 cpu_to_le16(blob_len);
889 /* Make sure that we tell the server that we 824 /* Make sure that we tell the server that we
890 are using the uid that it just gave us back 825 are using the uid that it just gave us back
891 on the response (challenge) */ 826 on the response (challenge) */
@@ -895,6 +830,7 @@ ssetup_ntlmssp_authenticate:
895 rc = -ENOSYS; 830 rc = -ENOSYS;
896 goto ssetup_exit; 831 goto ssetup_exit;
897 } 832 }
833 iov[1].iov_base = &pSMB->req.SecurityBlob[0];
898 /* unicode strings must be word aligned */ 834 /* unicode strings must be word aligned */
899 if ((iov[0].iov_len + iov[1].iov_len) % 2) { 835 if ((iov[0].iov_len + iov[1].iov_len) % 2) {
900 *bcc_ptr = 0; 836 *bcc_ptr = 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e0588cdf4cc5..82f78c4d6978 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 543 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
544 SECMODE_SIGN_ENABLED))) { 544 SECMODE_SIGN_ENABLED))) {
545 rc = cifs_verify_signature(midQ->resp_buf, 545 rc = cifs_verify_signature(midQ->resp_buf,
546 ses->server, 546 &ses->server->mac_signing_key,
547 midQ->sequence_number+1); 547 midQ->sequence_number+1);
548 if (rc) { 548 if (rc) {
549 cERROR(1, "Unexpected SMB signature"); 549 cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 731 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
732 SECMODE_SIGN_ENABLED))) { 732 SECMODE_SIGN_ENABLED))) {
733 rc = cifs_verify_signature(out_buf, 733 rc = cifs_verify_signature(out_buf,
734 ses->server, 734 &ses->server->mac_signing_key,
735 midQ->sequence_number+1); 735 midQ->sequence_number+1);
736 if (rc) { 736 if (rc) {
737 cERROR(1, "Unexpected SMB signature"); 737 cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED | 981 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
982 SECMODE_SIGN_ENABLED))) { 982 SECMODE_SIGN_ENABLED))) {
983 rc = cifs_verify_signature(out_buf, 983 rc = cifs_verify_signature(out_buf,
984 ses->server, 984 &ses->server->mac_signing_key,
985 midQ->sequence_number+1); 985 midQ->sequence_number+1);
986 if (rc) { 986 if (rc) {
987 cERROR(1, "Unexpected SMB signature"); 987 cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index de89645777c7..116af7546cf0 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
184 } 184 }
185 185
186 /* adjust outsize. is this useful ?? */ 186 /* adjust outsize. is this useful ?? */
187 req->uc_outSize = nbytes; 187 req->uc_outSize = nbytes;
188 req->uc_flags |= REQ_WRITE; 188 req->uc_flags |= CODA_REQ_WRITE;
189 count = nbytes; 189 count = nbytes;
190 190
191 /* Convert filedescriptor into a file handle */ 191 /* Convert filedescriptor into a file handle */
diff --git a/fs/compat.c b/fs/compat.c
index 718c7062aec1..0644a154672b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1153{ 1153{
1154 compat_ssize_t tot_len; 1154 compat_ssize_t tot_len;
1155 struct iovec iovstack[UIO_FASTIOV]; 1155 struct iovec iovstack[UIO_FASTIOV];
1156 struct iovec *iov; 1156 struct iovec *iov = iovstack;
1157 ssize_t ret; 1157 ssize_t ret;
1158 io_fn_t fn; 1158 io_fn_t fn;
1159 iov_fn_t fnv; 1159 iov_fn_t fnv;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 51f270b479b6..48d74c7391d1 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio)
634 int ret = 0; 634 int ret = 0;
635 635
636 if (dio->bio) { 636 if (dio->bio) {
637 loff_t cur_offset = dio->block_in_file << dio->blkbits; 637 loff_t cur_offset = dio->cur_page_fs_offset;
638 loff_t bio_next_offset = dio->logical_offset_in_bio + 638 loff_t bio_next_offset = dio->logical_offset_in_bio +
639 dio->bio->bi_size; 639 dio->bio->bi_size;
640 640
@@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio)
659 * Submit now if the underlying fs is about to perform a 659 * Submit now if the underlying fs is about to perform a
660 * metadata read 660 * metadata read
661 */ 661 */
662 if (dio->boundary) 662 else if (dio->boundary)
663 dio_bio_submit(dio); 663 dio_bio_submit(dio);
664 } 664 }
665 665
diff --git a/fs/exec.c b/fs/exec.c
index 2d9455282744..828dd2461d6b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
376 argv++; 376 argv++;
377 if (i++ >= max) 377 if (i++ >= max)
378 return -E2BIG; 378 return -E2BIG;
379
380 if (fatal_signal_pending(current))
381 return -ERESTARTNOHAND;
379 cond_resched(); 382 cond_resched();
380 } 383 }
381 } 384 }
@@ -419,6 +422,12 @@ static int copy_strings(int argc, const char __user *const __user *argv,
419 while (len > 0) { 422 while (len > 0) {
420 int offset, bytes_to_copy; 423 int offset, bytes_to_copy;
421 424
425 if (fatal_signal_pending(current)) {
426 ret = -ERESTARTNOHAND;
427 goto out;
428 }
429 cond_resched();
430
422 offset = pos % PAGE_SIZE; 431 offset = pos % PAGE_SIZE;
423 if (offset == 0) 432 if (offset == 0)
424 offset = PAGE_SIZE; 433 offset = PAGE_SIZE;
@@ -594,6 +603,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
594#else 603#else
595 stack_top = arch_align_stack(stack_top); 604 stack_top = arch_align_stack(stack_top);
596 stack_top = PAGE_ALIGN(stack_top); 605 stack_top = PAGE_ALIGN(stack_top);
606
607 if (unlikely(stack_top < mmap_min_addr) ||
608 unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
609 return -ENOMEM;
610
597 stack_shift = vma->vm_end - stack_top; 611 stack_shift = vma->vm_end - stack_top;
598 612
599 bprm->p -= stack_shift; 613 bprm->p -= stack_shift;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6769fd0f35b8..f8cc34f542c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync);
769 769
770static int __init fcntl_init(void) 770static int __init fcntl_init(void)
771{ 771{
772 /* please add new bits here to ensure allocation uniqueness */ 772 /*
773 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 773 * Please add new bits here to ensure allocation uniqueness.
774 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
775 * is defined as O_NONBLOCK on some platforms and not on others.
776 */
777 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
774 O_RDONLY | O_WRONLY | O_RDWR | 778 O_RDONLY | O_WRONLY | O_RDWR |
775 O_CREAT | O_EXCL | O_NOCTTY | 779 O_CREAT | O_EXCL | O_NOCTTY |
776 O_TRUNC | O_APPEND | O_NONBLOCK | 780 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
777 __O_SYNC | O_DSYNC | FASYNC | 781 __O_SYNC | O_DSYNC | FASYNC |
778 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 782 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
779 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 783 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7d9d06ba184b..ab38fef1c9a1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,8 +52,6 @@ struct wb_writeback_work {
52#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
53#include <trace/events/writeback.h> 53#include <trace/events/writeback.h>
54 54
55#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
56
57/* 55/*
58 * We don't actually have pdflush, but this one is exported though /proc... 56 * We don't actually have pdflush, but this one is exported though /proc...
59 */ 57 */
@@ -71,6 +69,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
71 return test_bit(BDI_writeback_running, &bdi->state); 69 return test_bit(BDI_writeback_running, &bdi->state);
72} 70}
73 71
72static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
73{
74 struct super_block *sb = inode->i_sb;
75
76 if (strcmp(sb->s_type->name, "bdev") == 0)
77 return inode->i_mapping->backing_dev_info;
78
79 return sb->s_bdi;
80}
81
74static void bdi_queue_work(struct backing_dev_info *bdi, 82static void bdi_queue_work(struct backing_dev_info *bdi,
75 struct wb_writeback_work *work) 83 struct wb_writeback_work *work)
76{ 84{
@@ -808,7 +816,7 @@ int bdi_writeback_thread(void *data)
808 wb->last_active = jiffies; 816 wb->last_active = jiffies;
809 817
810 set_current_state(TASK_INTERRUPTIBLE); 818 set_current_state(TASK_INTERRUPTIBLE);
811 if (!list_empty(&bdi->work_list)) { 819 if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
812 __set_current_state(TASK_RUNNING); 820 __set_current_state(TASK_RUNNING);
813 continue; 821 continue;
814 } 822 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index d367af1514ef..cde755cca564 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1354 loff_t file_size; 1354 loff_t file_size;
1355 unsigned int num; 1355 unsigned int num;
1356 unsigned int offset; 1356 unsigned int offset;
1357 size_t total_len; 1357 size_t total_len = 0;
1358 1358
1359 req = fuse_get_req(fc); 1359 req = fuse_get_req(fc);
1360 if (IS_ERR(req)) 1360 if (IS_ERR(req))
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cde1248a6225..ac750bd31a6f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -932,7 +932,7 @@ int gfs2_logd(void *data)
932 932
933 do { 933 do {
934 prepare_to_wait(&sdp->sd_logd_waitq, &wait, 934 prepare_to_wait(&sdp->sd_logd_waitq, &wait,
935 TASK_UNINTERRUPTIBLE); 935 TASK_INTERRUPTIBLE);
936 if (!gfs2_ail_flush_reqd(sdp) && 936 if (!gfs2_ail_flush_reqd(sdp) &&
937 !gfs2_jrnl_flush_reqd(sdp) && 937 !gfs2_jrnl_flush_reqd(sdp) &&
938 !kthread_should_stop()) 938 !kthread_should_stop())
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index e20ee85955d1..f3f3578393a4 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
115 115
116 inode_inc_link_count(dir); 116 inode_inc_link_count(dir);
117 117
118 inode = minix_new_inode(dir, mode, &err); 118 inode = minix_new_inode(dir, S_IFDIR | mode, &err);
119 if (!inode) 119 if (!inode)
120 goto out_dir; 120 goto out_dir;
121 121
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6c2aad49d731..f7e13db613cb 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -63,6 +63,7 @@ config NFS_V3_ACL
63config NFS_V4 63config NFS_V4
64 bool "NFS client support for NFS version 4" 64 bool "NFS client support for NFS version 4"
65 depends on NFS_FS 65 depends on NFS_FS
66 select SUNRPC_GSS
66 help 67 help
67 This option enables support for version 4 of the NFS protocol 68 This option enables support for version 4 of the NFS protocol
68 (RFC 3530) in the kernel's NFS client. 69 (RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4e7df2adb212..e7340729af89 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
275 sin1->sin6_scope_id != sin2->sin6_scope_id) 275 sin1->sin6_scope_id != sin2->sin6_scope_id)
276 return 0; 276 return 0;
277 277
278 return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); 278 return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
279} 279}
280#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ 280#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
281static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, 281static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eb51bd6201da..05bf3c0dc751 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
723 default: 723 default:
724 BUG(); 724 BUG();
725 } 725 }
726 if (res < 0)
727 dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
728 " - error %d!\n",
729 __func__, res);
730 return res; 726 return res;
731} 727}
732 728
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ec3966e4706b..f4cbf0c306c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
431 goto out_err; 431 goto out_err;
432 432
433 error = server->nfs_client->rpc_ops->statfs(server, fh, &res); 433 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
434 if (unlikely(error == -ESTALE)) {
435 struct dentry *pd_dentry;
434 436
437 pd_dentry = dget_parent(dentry);
438 if (pd_dentry != NULL) {
439 nfs_zap_caches(pd_dentry->d_inode);
440 dput(pd_dentry);
441 }
442 }
435 nfs_free_fattr(res.fattr); 443 nfs_free_fattr(res.fattr);
436 if (error < 0) 444 if (error < 0)
437 goto out_err; 445 goto out_err;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 95932f523aef..4264377552e2 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -69,6 +69,7 @@ config NFSD_V4
69 depends on NFSD && PROC_FS && EXPERIMENTAL 69 depends on NFSD && PROC_FS && EXPERIMENTAL
70 select NFSD_V3 70 select NFSD_V3
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS
72 help 73 help
73 This option enables support in your system's NFS server for 74 This option enables support in your system's NFS server for
74 version 4 of the NFS protocol (RFC 3530). 75 version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a76e0aa5cd3f..391915093fe1 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
209 } 209 }
210 210
211 inode->i_mode = new_mode; 211 inode->i_mode = new_mode;
212 inode->i_ctime = CURRENT_TIME;
212 di->i_mode = cpu_to_le16(inode->i_mode); 213 di->i_mode = cpu_to_le16(inode->i_mode);
214 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
215 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
213 216
214 ocfs2_journal_dirty(handle, di_bh); 217 ocfs2_journal_dirty(handle, di_bh);
215 218
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 41d5f1f92d56..52c7557f3e25 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -62,10 +62,51 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
62static LIST_HEAD(o2hb_node_events); 62static LIST_HEAD(o2hb_node_events);
63static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); 63static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
64 64
65/*
66 * In global heartbeat, we maintain a series of region bitmaps.
67 * - o2hb_region_bitmap allows us to limit the region number to max region.
68 * - o2hb_live_region_bitmap tracks live regions (seen steady iterations).
69 * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes
70 * heartbeat on it.
71 * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts.
72 */
73static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
74static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
75static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
76static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)];
77
78#define O2HB_DB_TYPE_LIVENODES 0
79#define O2HB_DB_TYPE_LIVEREGIONS 1
80#define O2HB_DB_TYPE_QUORUMREGIONS 2
81#define O2HB_DB_TYPE_FAILEDREGIONS 3
82#define O2HB_DB_TYPE_REGION_LIVENODES 4
83#define O2HB_DB_TYPE_REGION_NUMBER 5
84#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6
85struct o2hb_debug_buf {
86 int db_type;
87 int db_size;
88 int db_len;
89 void *db_data;
90};
91
92static struct o2hb_debug_buf *o2hb_db_livenodes;
93static struct o2hb_debug_buf *o2hb_db_liveregions;
94static struct o2hb_debug_buf *o2hb_db_quorumregions;
95static struct o2hb_debug_buf *o2hb_db_failedregions;
96
65#define O2HB_DEBUG_DIR "o2hb" 97#define O2HB_DEBUG_DIR "o2hb"
66#define O2HB_DEBUG_LIVENODES "livenodes" 98#define O2HB_DEBUG_LIVENODES "livenodes"
99#define O2HB_DEBUG_LIVEREGIONS "live_regions"
100#define O2HB_DEBUG_QUORUMREGIONS "quorum_regions"
101#define O2HB_DEBUG_FAILEDREGIONS "failed_regions"
102#define O2HB_DEBUG_REGION_NUMBER "num"
103#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms"
104
67static struct dentry *o2hb_debug_dir; 105static struct dentry *o2hb_debug_dir;
68static struct dentry *o2hb_debug_livenodes; 106static struct dentry *o2hb_debug_livenodes;
107static struct dentry *o2hb_debug_liveregions;
108static struct dentry *o2hb_debug_quorumregions;
109static struct dentry *o2hb_debug_failedregions;
69 110
70static LIST_HEAD(o2hb_all_regions); 111static LIST_HEAD(o2hb_all_regions);
71 112
@@ -77,7 +118,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
77 118
78#define O2HB_DEFAULT_BLOCK_BITS 9 119#define O2HB_DEFAULT_BLOCK_BITS 9
79 120
121enum o2hb_heartbeat_modes {
122 O2HB_HEARTBEAT_LOCAL = 0,
123 O2HB_HEARTBEAT_GLOBAL,
124 O2HB_HEARTBEAT_NUM_MODES,
125};
126
127char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = {
128 "local", /* O2HB_HEARTBEAT_LOCAL */
129 "global", /* O2HB_HEARTBEAT_GLOBAL */
130};
131
80unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; 132unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
133unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL;
81 134
82/* Only sets a new threshold if there are no active regions. 135/* Only sets a new threshold if there are no active regions.
83 * 136 *
@@ -94,6 +147,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold)
94 } 147 }
95} 148}
96 149
150static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode)
151{
152 int ret = -1;
153
154 if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) {
155 spin_lock(&o2hb_live_lock);
156 if (list_empty(&o2hb_all_regions)) {
157 o2hb_heartbeat_mode = hb_mode;
158 ret = 0;
159 }
160 spin_unlock(&o2hb_live_lock);
161 }
162
163 return ret;
164}
165
97struct o2hb_node_event { 166struct o2hb_node_event {
98 struct list_head hn_item; 167 struct list_head hn_item;
99 enum o2hb_callback_type hn_event_type; 168 enum o2hb_callback_type hn_event_type;
@@ -135,6 +204,18 @@ struct o2hb_region {
135 struct block_device *hr_bdev; 204 struct block_device *hr_bdev;
136 struct o2hb_disk_slot *hr_slots; 205 struct o2hb_disk_slot *hr_slots;
137 206
207 /* live node map of this region */
208 unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
209 unsigned int hr_region_num;
210
211 struct dentry *hr_debug_dir;
212 struct dentry *hr_debug_livenodes;
213 struct dentry *hr_debug_regnum;
214 struct dentry *hr_debug_elapsed_time;
215 struct o2hb_debug_buf *hr_db_livenodes;
216 struct o2hb_debug_buf *hr_db_regnum;
217 struct o2hb_debug_buf *hr_db_elapsed_time;
218
138 /* let the person setting up hb wait for it to return until it 219 /* let the person setting up hb wait for it to return until it
139 * has reached a 'steady' state. This will be fixed when we have 220 * has reached a 'steady' state. This will be fixed when we have
140 * a more complete api that doesn't lead to this sort of fragility. */ 221 * a more complete api that doesn't lead to this sort of fragility. */
@@ -163,8 +244,19 @@ struct o2hb_bio_wait_ctxt {
163 int wc_error; 244 int wc_error;
164}; 245};
165 246
247static int o2hb_pop_count(void *map, int count)
248{
249 int i = -1, pop = 0;
250
251 while ((i = find_next_bit(map, count, i + 1)) < count)
252 pop++;
253 return pop;
254}
255
166static void o2hb_write_timeout(struct work_struct *work) 256static void o2hb_write_timeout(struct work_struct *work)
167{ 257{
258 int failed, quorum;
259 unsigned long flags;
168 struct o2hb_region *reg = 260 struct o2hb_region *reg =
169 container_of(work, struct o2hb_region, 261 container_of(work, struct o2hb_region,
170 hr_write_timeout_work.work); 262 hr_write_timeout_work.work);
@@ -172,6 +264,28 @@ static void o2hb_write_timeout(struct work_struct *work)
172 mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " 264 mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
173 "milliseconds\n", reg->hr_dev_name, 265 "milliseconds\n", reg->hr_dev_name,
174 jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); 266 jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
267
268 if (o2hb_global_heartbeat_active()) {
269 spin_lock_irqsave(&o2hb_live_lock, flags);
270 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
271 set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
272 failed = o2hb_pop_count(&o2hb_failed_region_bitmap,
273 O2NM_MAX_REGIONS);
274 quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap,
275 O2NM_MAX_REGIONS);
276 spin_unlock_irqrestore(&o2hb_live_lock, flags);
277
278 mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n",
279 quorum, failed);
280
281 /*
282 * Fence if the number of failed regions >= half the number
283 * of quorum regions
284 */
285 if ((failed << 1) < quorum)
286 return;
287 }
288
175 o2quo_disk_timeout(); 289 o2quo_disk_timeout();
176} 290}
177 291
@@ -180,6 +294,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
180 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", 294 mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
181 O2HB_MAX_WRITE_TIMEOUT_MS); 295 O2HB_MAX_WRITE_TIMEOUT_MS);
182 296
297 if (o2hb_global_heartbeat_active()) {
298 spin_lock(&o2hb_live_lock);
299 clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
300 spin_unlock(&o2hb_live_lock);
301 }
183 cancel_delayed_work(&reg->hr_write_timeout_work); 302 cancel_delayed_work(&reg->hr_write_timeout_work);
184 reg->hr_last_timeout_start = jiffies; 303 reg->hr_last_timeout_start = jiffies;
185 schedule_delayed_work(&reg->hr_write_timeout_work, 304 schedule_delayed_work(&reg->hr_write_timeout_work,
@@ -513,6 +632,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event,
513{ 632{
514 assert_spin_locked(&o2hb_live_lock); 633 assert_spin_locked(&o2hb_live_lock);
515 634
635 BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB));
636
516 event->hn_event_type = type; 637 event->hn_event_type = type;
517 event->hn_node = node; 638 event->hn_node = node;
518 event->hn_node_num = node_num; 639 event->hn_node_num = node_num;
@@ -554,6 +675,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
554 o2nm_node_put(node); 675 o2nm_node_put(node);
555} 676}
556 677
678static void o2hb_set_quorum_device(struct o2hb_region *reg,
679 struct o2hb_disk_slot *slot)
680{
681 assert_spin_locked(&o2hb_live_lock);
682
683 if (!o2hb_global_heartbeat_active())
684 return;
685
686 if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
687 return;
688
689 /*
690 * A region can be added to the quorum only when it sees all
691 * live nodes heartbeat on it. In other words, the region has been
692 * added to all nodes.
693 */
694 if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
695 sizeof(o2hb_live_node_bitmap)))
696 return;
697
698 if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD)
699 return;
700
701 printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n",
702 config_item_name(&reg->hr_item));
703
704 set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
705}
706
557static int o2hb_check_slot(struct o2hb_region *reg, 707static int o2hb_check_slot(struct o2hb_region *reg,
558 struct o2hb_disk_slot *slot) 708 struct o2hb_disk_slot *slot)
559{ 709{
@@ -565,14 +715,22 @@ static int o2hb_check_slot(struct o2hb_region *reg,
565 u64 cputime; 715 u64 cputime;
566 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; 716 unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
567 unsigned int slot_dead_ms; 717 unsigned int slot_dead_ms;
718 int tmp;
568 719
569 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); 720 memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
570 721
571 /* Is this correct? Do we assume that the node doesn't exist 722 /*
572 * if we're not configured for him? */ 723 * If a node is no longer configured but is still in the livemap, we
724 * may need to clear that bit from the livemap.
725 */
573 node = o2nm_get_node_by_num(slot->ds_node_num); 726 node = o2nm_get_node_by_num(slot->ds_node_num);
574 if (!node) 727 if (!node) {
575 return 0; 728 spin_lock(&o2hb_live_lock);
729 tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap);
730 spin_unlock(&o2hb_live_lock);
731 if (!tmp)
732 return 0;
733 }
576 734
577 if (!o2hb_verify_crc(reg, hb_block)) { 735 if (!o2hb_verify_crc(reg, hb_block)) {
578 /* all paths from here will drop o2hb_live_lock for 736 /* all paths from here will drop o2hb_live_lock for
@@ -639,8 +797,12 @@ fire_callbacks:
639 mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", 797 mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n",
640 slot->ds_node_num, (long long)slot->ds_last_generation); 798 slot->ds_node_num, (long long)slot->ds_last_generation);
641 799
800 set_bit(slot->ds_node_num, reg->hr_live_node_bitmap);
801
642 /* first on the list generates a callback */ 802 /* first on the list generates a callback */
643 if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { 803 if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
804 mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes "
805 "bitmap\n", slot->ds_node_num);
644 set_bit(slot->ds_node_num, o2hb_live_node_bitmap); 806 set_bit(slot->ds_node_num, o2hb_live_node_bitmap);
645 807
646 o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, 808 o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node,
@@ -684,13 +846,18 @@ fire_callbacks:
684 mlog(ML_HEARTBEAT, "Node %d left my region\n", 846 mlog(ML_HEARTBEAT, "Node %d left my region\n",
685 slot->ds_node_num); 847 slot->ds_node_num);
686 848
849 clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap);
850
687 /* last off the live_slot generates a callback */ 851 /* last off the live_slot generates a callback */
688 list_del_init(&slot->ds_live_item); 852 list_del_init(&slot->ds_live_item);
689 if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { 853 if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
854 mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live "
855 "nodes bitmap\n", slot->ds_node_num);
690 clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); 856 clear_bit(slot->ds_node_num, o2hb_live_node_bitmap);
691 857
692 o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, 858 /* node can be null */
693 slot->ds_node_num); 859 o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB,
860 node, slot->ds_node_num);
694 861
695 changed = 1; 862 changed = 1;
696 } 863 }
@@ -706,11 +873,14 @@ fire_callbacks:
706 slot->ds_equal_samples = 0; 873 slot->ds_equal_samples = 0;
707 } 874 }
708out: 875out:
876 o2hb_set_quorum_device(reg, slot);
877
709 spin_unlock(&o2hb_live_lock); 878 spin_unlock(&o2hb_live_lock);
710 879
711 o2hb_run_event_list(&event); 880 o2hb_run_event_list(&event);
712 881
713 o2nm_node_put(node); 882 if (node)
883 o2nm_node_put(node);
714 return changed; 884 return changed;
715} 885}
716 886
@@ -737,6 +907,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
737{ 907{
738 int i, ret, highest_node, change = 0; 908 int i, ret, highest_node, change = 0;
739 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 909 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
910 unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
740 struct o2hb_bio_wait_ctxt write_wc; 911 struct o2hb_bio_wait_ctxt write_wc;
741 912
742 ret = o2nm_configured_node_map(configured_nodes, 913 ret = o2nm_configured_node_map(configured_nodes,
@@ -746,6 +917,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
746 return ret; 917 return ret;
747 } 918 }
748 919
920 /*
921 * If a node is not configured but is in the livemap, we still need
922 * to read the slot so as to be able to remove it from the livemap.
923 */
924 o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
925 i = -1;
926 while ((i = find_next_bit(live_node_bitmap,
927 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
928 set_bit(i, configured_nodes);
929 }
930
749 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 931 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
750 if (highest_node >= O2NM_MAX_NODES) { 932 if (highest_node >= O2NM_MAX_NODES) {
751 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 933 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
@@ -917,21 +1099,59 @@ static int o2hb_thread(void *data)
917#ifdef CONFIG_DEBUG_FS 1099#ifdef CONFIG_DEBUG_FS
918static int o2hb_debug_open(struct inode *inode, struct file *file) 1100static int o2hb_debug_open(struct inode *inode, struct file *file)
919{ 1101{
1102 struct o2hb_debug_buf *db = inode->i_private;
1103 struct o2hb_region *reg;
920 unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 1104 unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
921 char *buf = NULL; 1105 char *buf = NULL;
922 int i = -1; 1106 int i = -1;
923 int out = 0; 1107 int out = 0;
924 1108
1109 /* max_nodes should be the largest bitmap we pass here */
1110 BUG_ON(sizeof(map) < db->db_size);
1111
925 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1112 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
926 if (!buf) 1113 if (!buf)
927 goto bail; 1114 goto bail;
928 1115
929 o2hb_fill_node_map(map, sizeof(map)); 1116 switch (db->db_type) {
1117 case O2HB_DB_TYPE_LIVENODES:
1118 case O2HB_DB_TYPE_LIVEREGIONS:
1119 case O2HB_DB_TYPE_QUORUMREGIONS:
1120 case O2HB_DB_TYPE_FAILEDREGIONS:
1121 spin_lock(&o2hb_live_lock);
1122 memcpy(map, db->db_data, db->db_size);
1123 spin_unlock(&o2hb_live_lock);
1124 break;
1125
1126 case O2HB_DB_TYPE_REGION_LIVENODES:
1127 spin_lock(&o2hb_live_lock);
1128 reg = (struct o2hb_region *)db->db_data;
1129 memcpy(map, reg->hr_live_node_bitmap, db->db_size);
1130 spin_unlock(&o2hb_live_lock);
1131 break;
1132
1133 case O2HB_DB_TYPE_REGION_NUMBER:
1134 reg = (struct o2hb_region *)db->db_data;
1135 out += snprintf(buf + out, PAGE_SIZE - out, "%d\n",
1136 reg->hr_region_num);
1137 goto done;
1138
1139 case O2HB_DB_TYPE_REGION_ELAPSED_TIME:
1140 reg = (struct o2hb_region *)db->db_data;
1141 out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
1142 jiffies_to_msecs(jiffies -
1143 reg->hr_last_timeout_start));
1144 goto done;
1145
1146 default:
1147 goto done;
1148 }
930 1149
931 while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) 1150 while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len)
932 out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); 1151 out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
933 out += snprintf(buf + out, PAGE_SIZE - out, "\n"); 1152 out += snprintf(buf + out, PAGE_SIZE - out, "\n");
934 1153
1154done:
935 i_size_write(inode, out); 1155 i_size_write(inode, out);
936 1156
937 file->private_data = buf; 1157 file->private_data = buf;
@@ -978,10 +1198,104 @@ static const struct file_operations o2hb_debug_fops = {
978 1198
979void o2hb_exit(void) 1199void o2hb_exit(void)
980{ 1200{
981 if (o2hb_debug_livenodes) 1201 kfree(o2hb_db_livenodes);
982 debugfs_remove(o2hb_debug_livenodes); 1202 kfree(o2hb_db_liveregions);
983 if (o2hb_debug_dir) 1203 kfree(o2hb_db_quorumregions);
984 debugfs_remove(o2hb_debug_dir); 1204 kfree(o2hb_db_failedregions);
1205 debugfs_remove(o2hb_debug_failedregions);
1206 debugfs_remove(o2hb_debug_quorumregions);
1207 debugfs_remove(o2hb_debug_liveregions);
1208 debugfs_remove(o2hb_debug_livenodes);
1209 debugfs_remove(o2hb_debug_dir);
1210}
1211
1212static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
1213 struct o2hb_debug_buf **db, int db_len,
1214 int type, int size, int len, void *data)
1215{
1216 *db = kmalloc(db_len, GFP_KERNEL);
1217 if (!*db)
1218 return NULL;
1219
1220 (*db)->db_type = type;
1221 (*db)->db_size = size;
1222 (*db)->db_len = len;
1223 (*db)->db_data = data;
1224
1225 return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db,
1226 &o2hb_debug_fops);
1227}
1228
1229static int o2hb_debug_init(void)
1230{
1231 int ret = -ENOMEM;
1232
1233 o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
1234 if (!o2hb_debug_dir) {
1235 mlog_errno(ret);
1236 goto bail;
1237 }
1238
1239 o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES,
1240 o2hb_debug_dir,
1241 &o2hb_db_livenodes,
1242 sizeof(*o2hb_db_livenodes),
1243 O2HB_DB_TYPE_LIVENODES,
1244 sizeof(o2hb_live_node_bitmap),
1245 O2NM_MAX_NODES,
1246 o2hb_live_node_bitmap);
1247 if (!o2hb_debug_livenodes) {
1248 mlog_errno(ret);
1249 goto bail;
1250 }
1251
1252 o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS,
1253 o2hb_debug_dir,
1254 &o2hb_db_liveregions,
1255 sizeof(*o2hb_db_liveregions),
1256 O2HB_DB_TYPE_LIVEREGIONS,
1257 sizeof(o2hb_live_region_bitmap),
1258 O2NM_MAX_REGIONS,
1259 o2hb_live_region_bitmap);
1260 if (!o2hb_debug_liveregions) {
1261 mlog_errno(ret);
1262 goto bail;
1263 }
1264
1265 o2hb_debug_quorumregions =
1266 o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS,
1267 o2hb_debug_dir,
1268 &o2hb_db_quorumregions,
1269 sizeof(*o2hb_db_quorumregions),
1270 O2HB_DB_TYPE_QUORUMREGIONS,
1271 sizeof(o2hb_quorum_region_bitmap),
1272 O2NM_MAX_REGIONS,
1273 o2hb_quorum_region_bitmap);
1274 if (!o2hb_debug_quorumregions) {
1275 mlog_errno(ret);
1276 goto bail;
1277 }
1278
1279 o2hb_debug_failedregions =
1280 o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS,
1281 o2hb_debug_dir,
1282 &o2hb_db_failedregions,
1283 sizeof(*o2hb_db_failedregions),
1284 O2HB_DB_TYPE_FAILEDREGIONS,
1285 sizeof(o2hb_failed_region_bitmap),
1286 O2NM_MAX_REGIONS,
1287 o2hb_failed_region_bitmap);
1288 if (!o2hb_debug_failedregions) {
1289 mlog_errno(ret);
1290 goto bail;
1291 }
1292
1293 ret = 0;
1294bail:
1295 if (ret)
1296 o2hb_exit();
1297
1298 return ret;
985} 1299}
986 1300
987int o2hb_init(void) 1301int o2hb_init(void)
@@ -997,24 +1311,12 @@ int o2hb_init(void)
997 INIT_LIST_HEAD(&o2hb_node_events); 1311 INIT_LIST_HEAD(&o2hb_node_events);
998 1312
999 memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); 1313 memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
1314 memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
1315 memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
1316 memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap));
1317 memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap));
1000 1318
1001 o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); 1319 return o2hb_debug_init();
1002 if (!o2hb_debug_dir) {
1003 mlog_errno(-ENOMEM);
1004 return -ENOMEM;
1005 }
1006
1007 o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
1008 S_IFREG|S_IRUSR,
1009 o2hb_debug_dir, NULL,
1010 &o2hb_debug_fops);
1011 if (!o2hb_debug_livenodes) {
1012 mlog_errno(-ENOMEM);
1013 debugfs_remove(o2hb_debug_dir);
1014 return -ENOMEM;
1015 }
1016
1017 return 0;
1018} 1320}
1019 1321
1020/* if we're already in a callback then we're already serialized by the sem */ 1322/* if we're already in a callback then we're already serialized by the sem */
@@ -1078,6 +1380,13 @@ static void o2hb_region_release(struct config_item *item)
1078 if (reg->hr_slots) 1380 if (reg->hr_slots)
1079 kfree(reg->hr_slots); 1381 kfree(reg->hr_slots);
1080 1382
1383 kfree(reg->hr_db_regnum);
1384 kfree(reg->hr_db_livenodes);
1385 debugfs_remove(reg->hr_debug_livenodes);
1386 debugfs_remove(reg->hr_debug_regnum);
1387 debugfs_remove(reg->hr_debug_elapsed_time);
1388 debugfs_remove(reg->hr_debug_dir);
1389
1081 spin_lock(&o2hb_live_lock); 1390 spin_lock(&o2hb_live_lock);
1082 list_del(&reg->hr_all_item); 1391 list_del(&reg->hr_all_item);
1083 spin_unlock(&o2hb_live_lock); 1392 spin_unlock(&o2hb_live_lock);
@@ -1441,6 +1750,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1441 /* Ok, we were woken. Make sure it wasn't by drop_item() */ 1750 /* Ok, we were woken. Make sure it wasn't by drop_item() */
1442 spin_lock(&o2hb_live_lock); 1751 spin_lock(&o2hb_live_lock);
1443 hb_task = reg->hr_task; 1752 hb_task = reg->hr_task;
1753 if (o2hb_global_heartbeat_active())
1754 set_bit(reg->hr_region_num, o2hb_live_region_bitmap);
1444 spin_unlock(&o2hb_live_lock); 1755 spin_unlock(&o2hb_live_lock);
1445 1756
1446 if (hb_task) 1757 if (hb_task)
@@ -1448,6 +1759,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1448 else 1759 else
1449 ret = -EIO; 1760 ret = -EIO;
1450 1761
1762 if (hb_task && o2hb_global_heartbeat_active())
1763 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n",
1764 config_item_name(&reg->hr_item));
1765
1451out: 1766out:
1452 if (filp) 1767 if (filp)
1453 fput(filp); 1768 fput(filp);
@@ -1586,21 +1901,94 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
1586 : NULL; 1901 : NULL;
1587} 1902}
1588 1903
1904static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
1905{
1906 int ret = -ENOMEM;
1907
1908 reg->hr_debug_dir =
1909 debugfs_create_dir(config_item_name(&reg->hr_item), dir);
1910 if (!reg->hr_debug_dir) {
1911 mlog_errno(ret);
1912 goto bail;
1913 }
1914
1915 reg->hr_debug_livenodes =
1916 o2hb_debug_create(O2HB_DEBUG_LIVENODES,
1917 reg->hr_debug_dir,
1918 &(reg->hr_db_livenodes),
1919 sizeof(*(reg->hr_db_livenodes)),
1920 O2HB_DB_TYPE_REGION_LIVENODES,
1921 sizeof(reg->hr_live_node_bitmap),
1922 O2NM_MAX_NODES, reg);
1923 if (!reg->hr_debug_livenodes) {
1924 mlog_errno(ret);
1925 goto bail;
1926 }
1927
1928 reg->hr_debug_regnum =
1929 o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER,
1930 reg->hr_debug_dir,
1931 &(reg->hr_db_regnum),
1932 sizeof(*(reg->hr_db_regnum)),
1933 O2HB_DB_TYPE_REGION_NUMBER,
1934 0, O2NM_MAX_NODES, reg);
1935 if (!reg->hr_debug_regnum) {
1936 mlog_errno(ret);
1937 goto bail;
1938 }
1939
1940 reg->hr_debug_elapsed_time =
1941 o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME,
1942 reg->hr_debug_dir,
1943 &(reg->hr_db_elapsed_time),
1944 sizeof(*(reg->hr_db_elapsed_time)),
1945 O2HB_DB_TYPE_REGION_ELAPSED_TIME,
1946 0, 0, reg);
1947 if (!reg->hr_debug_elapsed_time) {
1948 mlog_errno(ret);
1949 goto bail;
1950 }
1951
1952 ret = 0;
1953bail:
1954 return ret;
1955}
1956
1589static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, 1957static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
1590 const char *name) 1958 const char *name)
1591{ 1959{
1592 struct o2hb_region *reg = NULL; 1960 struct o2hb_region *reg = NULL;
1961 int ret;
1593 1962
1594 reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); 1963 reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
1595 if (reg == NULL) 1964 if (reg == NULL)
1596 return ERR_PTR(-ENOMEM); 1965 return ERR_PTR(-ENOMEM);
1597 1966
1598 config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type); 1967 if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
1968 return ERR_PTR(-ENAMETOOLONG);
1599 1969
1600 spin_lock(&o2hb_live_lock); 1970 spin_lock(&o2hb_live_lock);
1971 reg->hr_region_num = 0;
1972 if (o2hb_global_heartbeat_active()) {
1973 reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap,
1974 O2NM_MAX_REGIONS);
1975 if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
1976 spin_unlock(&o2hb_live_lock);
1977 return ERR_PTR(-EFBIG);
1978 }
1979 set_bit(reg->hr_region_num, o2hb_region_bitmap);
1980 }
1601 list_add_tail(&reg->hr_all_item, &o2hb_all_regions); 1981 list_add_tail(&reg->hr_all_item, &o2hb_all_regions);
1602 spin_unlock(&o2hb_live_lock); 1982 spin_unlock(&o2hb_live_lock);
1603 1983
1984 config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
1985
1986 ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
1987 if (ret) {
1988 config_item_put(&reg->hr_item);
1989 return ERR_PTR(ret);
1990 }
1991
1604 return &reg->hr_item; 1992 return &reg->hr_item;
1605} 1993}
1606 1994
@@ -1612,6 +2000,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
1612 2000
1613 /* stop the thread when the user removes the region dir */ 2001 /* stop the thread when the user removes the region dir */
1614 spin_lock(&o2hb_live_lock); 2002 spin_lock(&o2hb_live_lock);
2003 if (o2hb_global_heartbeat_active()) {
2004 clear_bit(reg->hr_region_num, o2hb_region_bitmap);
2005 clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
2006 }
1615 hb_task = reg->hr_task; 2007 hb_task = reg->hr_task;
1616 reg->hr_task = NULL; 2008 reg->hr_task = NULL;
1617 spin_unlock(&o2hb_live_lock); 2009 spin_unlock(&o2hb_live_lock);
@@ -1628,6 +2020,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
1628 wake_up(&o2hb_steady_queue); 2020 wake_up(&o2hb_steady_queue);
1629 } 2021 }
1630 2022
2023 if (o2hb_global_heartbeat_active())
2024 printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
2025 config_item_name(&reg->hr_item));
1631 config_item_put(item); 2026 config_item_put(item);
1632} 2027}
1633 2028
@@ -1688,6 +2083,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group
1688 return count; 2083 return count;
1689} 2084}
1690 2085
2086static
2087ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group,
2088 char *page)
2089{
2090 return sprintf(page, "%s\n",
2091 o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]);
2092}
2093
2094static
2095ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group,
2096 const char *page, size_t count)
2097{
2098 unsigned int i;
2099 int ret;
2100 size_t len;
2101
2102 len = (page[count - 1] == '\n') ? count - 1 : count;
2103 if (!len)
2104 return -EINVAL;
2105
2106 for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) {
2107 if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len))
2108 continue;
2109
2110 ret = o2hb_global_hearbeat_mode_set(i);
2111 if (!ret)
2112 printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n",
2113 o2hb_heartbeat_mode_desc[i]);
2114 return count;
2115 }
2116
2117 return -EINVAL;
2118
2119}
2120
1691static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { 2121static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = {
1692 .attr = { .ca_owner = THIS_MODULE, 2122 .attr = { .ca_owner = THIS_MODULE,
1693 .ca_name = "dead_threshold", 2123 .ca_name = "dead_threshold",
@@ -1696,8 +2126,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold
1696 .store = o2hb_heartbeat_group_threshold_store, 2126 .store = o2hb_heartbeat_group_threshold_store,
1697}; 2127};
1698 2128
2129static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = {
2130 .attr = { .ca_owner = THIS_MODULE,
2131 .ca_name = "mode",
2132 .ca_mode = S_IRUGO | S_IWUSR },
2133 .show = o2hb_heartbeat_group_mode_show,
2134 .store = o2hb_heartbeat_group_mode_store,
2135};
2136
1699static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { 2137static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
1700 &o2hb_heartbeat_group_attr_threshold.attr, 2138 &o2hb_heartbeat_group_attr_threshold.attr,
2139 &o2hb_heartbeat_group_attr_mode.attr,
1701 NULL, 2140 NULL,
1702}; 2141};
1703 2142
@@ -1963,3 +2402,34 @@ void o2hb_stop_all_regions(void)
1963 spin_unlock(&o2hb_live_lock); 2402 spin_unlock(&o2hb_live_lock);
1964} 2403}
1965EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); 2404EXPORT_SYMBOL_GPL(o2hb_stop_all_regions);
2405
2406int o2hb_get_all_regions(char *region_uuids, u8 max_regions)
2407{
2408 struct o2hb_region *reg;
2409 int numregs = 0;
2410 char *p;
2411
2412 spin_lock(&o2hb_live_lock);
2413
2414 p = region_uuids;
2415 list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) {
2416 mlog(0, "Region: %s\n", config_item_name(&reg->hr_item));
2417 if (numregs < max_regions) {
2418 memcpy(p, config_item_name(&reg->hr_item),
2419 O2HB_MAX_REGION_NAME_LEN);
2420 p += O2HB_MAX_REGION_NAME_LEN;
2421 }
2422 numregs++;
2423 }
2424
2425 spin_unlock(&o2hb_live_lock);
2426
2427 return numregs;
2428}
2429EXPORT_SYMBOL_GPL(o2hb_get_all_regions);
2430
2431int o2hb_global_heartbeat_active(void)
2432{
2433 return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL);
2434}
2435EXPORT_SYMBOL(o2hb_global_heartbeat_active);
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 2f1649253b49..00ad8e8fea51 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -31,6 +31,8 @@
31 31
32#define O2HB_REGION_TIMEOUT_MS 2000 32#define O2HB_REGION_TIMEOUT_MS 2000
33 33
34#define O2HB_MAX_REGION_NAME_LEN 32
35
34/* number of changes to be seen as live */ 36/* number of changes to be seen as live */
35#define O2HB_LIVE_THRESHOLD 2 37#define O2HB_LIVE_THRESHOLD 2
36/* number of equal samples to be seen as dead */ 38/* number of equal samples to be seen as dead */
@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num);
81int o2hb_check_node_heartbeating_from_callback(u8 node_num); 83int o2hb_check_node_heartbeating_from_callback(u8 node_num);
82int o2hb_check_local_node_heartbeating(void); 84int o2hb_check_local_node_heartbeating(void);
83void o2hb_stop_all_regions(void); 85void o2hb_stop_all_regions(void);
86int o2hb_get_all_regions(char *region_uuids, u8 numregions);
87int o2hb_global_heartbeat_active(void);
84 88
85#endif /* O2CLUSTER_HEARTBEAT_H */ 89#endif /* O2CLUSTER_HEARTBEAT_H */
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index fd96e2a2fa56..ea2ed9f56c94 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,7 +119,8 @@
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ 122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
123#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
123 124
124#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 125#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
125#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 126#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index ed0c9f367fed..bb240647ca5f 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group,
711 config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); 711 config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
712 spin_lock_init(&node->nd_lock); 712 spin_lock_init(&node->nd_lock);
713 713
714 mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name);
715
714 return &node->nd_item; 716 return &node->nd_item;
715} 717}
716 718
@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group,
744 } 746 }
745 write_unlock(&cluster->cl_nodes_lock); 747 write_unlock(&cluster->cl_nodes_lock);
746 748
749 mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n",
750 config_item_name(&node->nd_item));
751
747 config_item_put(item); 752 config_item_put(item);
748} 753}
749 754
diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h
index 5b9854bad571..49b594325bec 100644
--- a/fs/ocfs2/cluster/ocfs2_nodemanager.h
+++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h
@@ -36,4 +36,10 @@
36/* host name, group name, cluster name all 64 bytes */ 36/* host name, group name, cluster name all 64 bytes */
37#define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN 37#define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN
38 38
39/*
40 * Maximum number of global heartbeat regions allowed.
41 * **CAUTION** Changing this number will break dlm compatibility.
42 */
43#define O2NM_MAX_REGIONS 32
44
39#endif /* _OCFS2_NODEMANAGER_H */ 45#endif /* _OCFS2_NODEMANAGER_H */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1361997cf205..9aa426e42123 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
977int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, 977int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
978 size_t caller_veclen, u8 target_node, int *status) 978 size_t caller_veclen, u8 target_node, int *status)
979{ 979{
980 int ret; 980 int ret = 0;
981 struct o2net_msg *msg = NULL; 981 struct o2net_msg *msg = NULL;
982 size_t veclen, caller_bytes = 0; 982 size_t veclen, caller_bytes = 0;
983 struct kvec *vec = NULL; 983 struct kvec *vec = NULL;
@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num,
1696{ 1696{
1697 o2quo_hb_down(node_num); 1697 o2quo_hb_down(node_num);
1698 1698
1699 if (!node)
1700 return;
1701
1699 if (node_num != o2nm_this_node()) 1702 if (node_num != o2nm_this_node())
1700 o2net_disconnect_node(node); 1703 o2net_disconnect_node(node);
1701 1704
@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
1709 1712
1710 o2quo_hb_up(node_num); 1713 o2quo_hb_up(node_num);
1711 1714
1715 BUG_ON(!node);
1716
1712 /* ensure an immediate connect attempt */ 1717 /* ensure an immediate connect attempt */
1713 nn->nn_last_connect_attempt = jiffies - 1718 nn->nn_last_connect_attempt = jiffies -
1714 (msecs_to_jiffies(o2net_reconnect_delay()) + 1); 1719 (msecs_to_jiffies(o2net_reconnect_delay()) + 1);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f04ebcfffc4a..c49f6de0e7ab 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3931 goto out_commit; 3931 goto out_commit;
3932 } 3932 }
3933 3933
3934 cpos = split_hash;
3935 ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
3936 data_ac, meta_ac, new_dx_leaves,
3937 num_dx_leaves);
3938 if (ret) {
3939 mlog_errno(ret);
3940 goto out_commit;
3941 }
3942
3934 for (i = 0; i < num_dx_leaves; i++) { 3943 for (i = 0; i < num_dx_leaves; i++) {
3935 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), 3944 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3936 orig_dx_leaves[i], 3945 orig_dx_leaves[i],
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3939 mlog_errno(ret); 3948 mlog_errno(ret);
3940 goto out_commit; 3949 goto out_commit;
3941 } 3950 }
3942 }
3943 3951
3944 cpos = split_hash; 3952 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3945 ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, 3953 new_dx_leaves[i],
3946 data_ac, meta_ac, new_dx_leaves, 3954 OCFS2_JOURNAL_ACCESS_WRITE);
3947 num_dx_leaves); 3955 if (ret) {
3948 if (ret) { 3956 mlog_errno(ret);
3949 mlog_errno(ret); 3957 goto out_commit;
3950 goto out_commit; 3958 }
3951 } 3959 }
3952 3960
3953 ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, 3961 ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4b6ae2c13b47..b36d0bf77a5a 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -445,7 +445,9 @@ enum {
445 DLM_LOCK_REQUEST_MSG, /* 515 */ 445 DLM_LOCK_REQUEST_MSG, /* 515 */
446 DLM_RECO_DATA_DONE_MSG, /* 516 */ 446 DLM_RECO_DATA_DONE_MSG, /* 516 */
447 DLM_BEGIN_RECO_MSG, /* 517 */ 447 DLM_BEGIN_RECO_MSG, /* 517 */
448 DLM_FINALIZE_RECO_MSG /* 518 */ 448 DLM_FINALIZE_RECO_MSG, /* 518 */
449 DLM_QUERY_REGION, /* 519 */
450 DLM_QUERY_NODEINFO, /* 520 */
449}; 451};
450 452
451struct dlm_reco_node_data 453struct dlm_reco_node_data
@@ -727,6 +729,31 @@ struct dlm_cancel_join
727 u8 domain[O2NM_MAX_NAME_LEN]; 729 u8 domain[O2NM_MAX_NAME_LEN];
728}; 730};
729 731
732struct dlm_query_region {
733 u8 qr_node;
734 u8 qr_numregions;
735 u8 qr_namelen;
736 u8 pad1;
737 u8 qr_domain[O2NM_MAX_NAME_LEN];
738 u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS];
739};
740
741struct dlm_node_info {
742 u8 ni_nodenum;
743 u8 pad1;
744 u16 ni_ipv4_port;
745 u32 ni_ipv4_address;
746};
747
748struct dlm_query_nodeinfo {
749 u8 qn_nodenum;
750 u8 qn_numnodes;
751 u8 qn_namelen;
752 u8 pad1;
753 u8 qn_domain[O2NM_MAX_NAME_LEN];
754 struct dlm_node_info qn_nodes[O2NM_MAX_NODES];
755};
756
730struct dlm_exit_domain 757struct dlm_exit_domain
731{ 758{
732 u8 node_idx; 759 u8 node_idx;
@@ -1030,6 +1057,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
1030 struct dlm_lock_resource *res); 1057 struct dlm_lock_resource *res);
1031void dlm_clean_master_list(struct dlm_ctxt *dlm, 1058void dlm_clean_master_list(struct dlm_ctxt *dlm,
1032 u8 dead_node); 1059 u8 dead_node);
1060void dlm_force_free_mles(struct dlm_ctxt *dlm);
1033int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); 1061int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
1034int __dlm_lockres_has_locks(struct dlm_lock_resource *res); 1062int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
1035int __dlm_lockres_unused(struct dlm_lock_resource *res); 1063int __dlm_lockres_unused(struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 26ff2e185b1e..272ec8631a51 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
636 spin_lock(&dlm->track_lock); 636 spin_lock(&dlm->track_lock);
637 if (oldres) 637 if (oldres)
638 track_list = &oldres->tracking; 638 track_list = &oldres->tracking;
639 else 639 else {
640 track_list = &dlm->tracking_list; 640 track_list = &dlm->tracking_list;
641 if (list_empty(track_list)) {
642 dl = NULL;
643 spin_unlock(&dlm->track_lock);
644 goto bail;
645 }
646 }
641 647
642 list_for_each_entry(res, track_list, tracking) { 648 list_for_each_entry(res, track_list, tracking) {
643 if (&res->tracking == &dlm->tracking_list) 649 if (&res->tracking == &dlm->tracking_list)
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
660 } else 666 } else
661 dl = NULL; 667 dl = NULL;
662 668
669bail:
663 /* passed to seq_show */ 670 /* passed to seq_show */
664 return dl; 671 return dl;
665} 672}
@@ -775,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
775 782
776 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ 783 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */
777 out += snprintf(db->buf + out, db->len - out, 784 out += snprintf(db->buf + out, db->len - out,
778 "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); 785 "Domain: %s Key: 0x%08x Protocol: %d.%d\n",
786 dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
787 dlm->dlm_locking_proto.pv_minor);
779 788
780 /* Thread Pid: xxx Node: xxx State: xxxxx */ 789 /* Thread Pid: xxx Node: xxx State: xxxxx */
781 out += snprintf(db->buf + out, db->len - out, 790 out += snprintf(db->buf + out, db->len - out,
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 153abb5abef0..58a93b953735 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
128 * will have a negotiated version with the same major number and a minor 128 * will have a negotiated version with the same major number and a minor
129 * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should 129 * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should
130 * be used to determine what a running domain is actually using. 130 * be used to determine what a running domain is actually using.
131 *
132 * New in version 1.1:
133 * - Message DLM_QUERY_REGION added to support global heartbeat
134 * - Message DLM_QUERY_NODEINFO added to allow online node removes
131 */ 135 */
132static const struct dlm_protocol_version dlm_protocol = { 136static const struct dlm_protocol_version dlm_protocol = {
133 .pv_major = 1, 137 .pv_major = 1,
134 .pv_minor = 0, 138 .pv_minor = 1,
135}; 139};
136 140
137#define DLM_DOMAIN_BACKOFF_MS 200 141#define DLM_DOMAIN_BACKOFF_MS 200
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
142 void **ret_data); 146 void **ret_data);
143static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, 147static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
144 void **ret_data); 148 void **ret_data);
149static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
150 void *data, void **ret_data);
145static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, 151static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
146 void **ret_data); 152 void **ret_data);
147static int dlm_protocol_compare(struct dlm_protocol_version *existing, 153static int dlm_protocol_compare(struct dlm_protocol_version *existing,
@@ -693,6 +699,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
693 699
694 dlm_mark_domain_leaving(dlm); 700 dlm_mark_domain_leaving(dlm);
695 dlm_leave_domain(dlm); 701 dlm_leave_domain(dlm);
702 dlm_force_free_mles(dlm);
696 dlm_complete_dlm_shutdown(dlm); 703 dlm_complete_dlm_shutdown(dlm);
697 } 704 }
698 dlm_put(dlm); 705 dlm_put(dlm);
@@ -920,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
920 return 0; 927 return 0;
921} 928}
922 929
930static int dlm_match_regions(struct dlm_ctxt *dlm,
931 struct dlm_query_region *qr)
932{
933 char *local = NULL, *remote = qr->qr_regions;
934 char *l, *r;
935 int localnr, i, j, foundit;
936 int status = 0;
937
938 if (!o2hb_global_heartbeat_active()) {
939 if (qr->qr_numregions) {
940 mlog(ML_ERROR, "Domain %s: Joining node %d has global "
941 "heartbeat enabled but local node %d does not\n",
942 qr->qr_domain, qr->qr_node, dlm->node_num);
943 status = -EINVAL;
944 }
945 goto bail;
946 }
947
948 if (o2hb_global_heartbeat_active() && !qr->qr_numregions) {
949 mlog(ML_ERROR, "Domain %s: Local node %d has global "
950 "heartbeat enabled but joining node %d does not\n",
951 qr->qr_domain, dlm->node_num, qr->qr_node);
952 status = -EINVAL;
953 goto bail;
954 }
955
956 r = remote;
957 for (i = 0; i < qr->qr_numregions; ++i) {
958 mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r);
959 r += O2HB_MAX_REGION_NAME_LEN;
960 }
961
962 local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
963 if (!local) {
964 status = -ENOMEM;
965 goto bail;
966 }
967
968 localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS);
969
970 /* compare local regions with remote */
971 l = local;
972 for (i = 0; i < localnr; ++i) {
973 foundit = 0;
974 r = remote;
975 for (j = 0; j <= qr->qr_numregions; ++j) {
976 if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) {
977 foundit = 1;
978 break;
979 }
980 r += O2HB_MAX_REGION_NAME_LEN;
981 }
982 if (!foundit) {
983 status = -EINVAL;
984 mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
985 "in local node %d but not in joining node %d\n",
986 qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l,
987 dlm->node_num, qr->qr_node);
988 goto bail;
989 }
990 l += O2HB_MAX_REGION_NAME_LEN;
991 }
992
993 /* compare remote with local regions */
994 r = remote;
995 for (i = 0; i < qr->qr_numregions; ++i) {
996 foundit = 0;
997 l = local;
998 for (j = 0; j < localnr; ++j) {
999 if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) {
1000 foundit = 1;
1001 break;
1002 }
1003 l += O2HB_MAX_REGION_NAME_LEN;
1004 }
1005 if (!foundit) {
1006 status = -EINVAL;
1007 mlog(ML_ERROR, "Domain %s: Region '%.*s' registered "
1008 "in joining node %d but not in local node %d\n",
1009 qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r,
1010 qr->qr_node, dlm->node_num);
1011 goto bail;
1012 }
1013 r += O2HB_MAX_REGION_NAME_LEN;
1014 }
1015
1016bail:
1017 kfree(local);
1018
1019 return status;
1020}
1021
1022static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map)
1023{
1024 struct dlm_query_region *qr = NULL;
1025 int status, ret = 0, i;
1026 char *p;
1027
1028 if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
1029 goto bail;
1030
1031 qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL);
1032 if (!qr) {
1033 ret = -ENOMEM;
1034 mlog_errno(ret);
1035 goto bail;
1036 }
1037
1038 qr->qr_node = dlm->node_num;
1039 qr->qr_namelen = strlen(dlm->name);
1040 memcpy(qr->qr_domain, dlm->name, qr->qr_namelen);
1041 /* if local hb, the numregions will be zero */
1042 if (o2hb_global_heartbeat_active())
1043 qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions,
1044 O2NM_MAX_REGIONS);
1045
1046 p = qr->qr_regions;
1047 for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN)
1048 mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p);
1049
1050 i = -1;
1051 while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
1052 i + 1)) < O2NM_MAX_NODES) {
1053 if (i == dlm->node_num)
1054 continue;
1055
1056 mlog(0, "Sending regions to node %d\n", i);
1057
1058 ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr,
1059 sizeof(struct dlm_query_region),
1060 i, &status);
1061 if (ret >= 0)
1062 ret = status;
1063 if (ret) {
1064 mlog(ML_ERROR, "Region mismatch %d, node %d\n",
1065 ret, i);
1066 break;
1067 }
1068 }
1069
1070bail:
1071 kfree(qr);
1072 return ret;
1073}
1074
1075static int dlm_query_region_handler(struct o2net_msg *msg, u32 len,
1076 void *data, void **ret_data)
1077{
1078 struct dlm_query_region *qr;
1079 struct dlm_ctxt *dlm = NULL;
1080 int status = 0;
1081 int locked = 0;
1082
1083 qr = (struct dlm_query_region *) msg->buf;
1084
1085 mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node,
1086 qr->qr_domain);
1087
1088 status = -EINVAL;
1089
1090 spin_lock(&dlm_domain_lock);
1091 dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen);
1092 if (!dlm) {
1093 mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
1094 "before join domain\n", qr->qr_node, qr->qr_domain);
1095 goto bail;
1096 }
1097
1098 spin_lock(&dlm->spinlock);
1099 locked = 1;
1100 if (dlm->joining_node != qr->qr_node) {
1101 mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
1102 "but joining node is %d\n", qr->qr_node, qr->qr_domain,
1103 dlm->joining_node);
1104 goto bail;
1105 }
1106
1107 /* Support for global heartbeat was added in 1.1 */
1108 if (dlm->dlm_locking_proto.pv_major == 1 &&
1109 dlm->dlm_locking_proto.pv_minor == 0) {
1110 mlog(ML_ERROR, "Node %d queried hb regions on domain %s "
1111 "but active dlm protocol is %d.%d\n", qr->qr_node,
1112 qr->qr_domain, dlm->dlm_locking_proto.pv_major,
1113 dlm->dlm_locking_proto.pv_minor);
1114 goto bail;
1115 }
1116
1117 status = dlm_match_regions(dlm, qr);
1118
1119bail:
1120 if (locked)
1121 spin_unlock(&dlm->spinlock);
1122 spin_unlock(&dlm_domain_lock);
1123
1124 return status;
1125}
1126
1127static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn)
1128{
1129 struct o2nm_node *local;
1130 struct dlm_node_info *remote;
1131 int i, j;
1132 int status = 0;
1133
1134 for (j = 0; j < qn->qn_numnodes; ++j)
1135 mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum,
1136 &(qn->qn_nodes[j].ni_ipv4_address),
1137 ntohs(qn->qn_nodes[j].ni_ipv4_port));
1138
1139 for (i = 0; i < O2NM_MAX_NODES && !status; ++i) {
1140 local = o2nm_get_node_by_num(i);
1141 remote = NULL;
1142 for (j = 0; j < qn->qn_numnodes; ++j) {
1143 if (qn->qn_nodes[j].ni_nodenum == i) {
1144 remote = &(qn->qn_nodes[j]);
1145 break;
1146 }
1147 }
1148
1149 if (!local && !remote)
1150 continue;
1151
1152 if ((local && !remote) || (!local && remote))
1153 status = -EINVAL;
1154
1155 if (!status &&
1156 ((remote->ni_nodenum != local->nd_num) ||
1157 (remote->ni_ipv4_port != local->nd_ipv4_port) ||
1158 (remote->ni_ipv4_address != local->nd_ipv4_address)))
1159 status = -EINVAL;
1160
1161 if (status) {
1162 if (remote && !local)
1163 mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
1164 "registered in joining node %d but not in "
1165 "local node %d\n", qn->qn_domain,
1166 remote->ni_nodenum,
1167 &(remote->ni_ipv4_address),
1168 ntohs(remote->ni_ipv4_port),
1169 qn->qn_nodenum, dlm->node_num);
1170 if (local && !remote)
1171 mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) "
1172 "registered in local node %d but not in "
1173 "joining node %d\n", qn->qn_domain,
1174 local->nd_num, &(local->nd_ipv4_address),
1175 ntohs(local->nd_ipv4_port),
1176 dlm->node_num, qn->qn_nodenum);
1177 BUG_ON((!local && !remote));
1178 }
1179
1180 if (local)
1181 o2nm_node_put(local);
1182 }
1183
1184 return status;
1185}
1186
1187static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map)
1188{
1189 struct dlm_query_nodeinfo *qn = NULL;
1190 struct o2nm_node *node;
1191 int ret = 0, status, count, i;
1192
1193 if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES)
1194 goto bail;
1195
1196 qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL);
1197 if (!qn) {
1198 ret = -ENOMEM;
1199 mlog_errno(ret);
1200 goto bail;
1201 }
1202
1203 for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) {
1204 node = o2nm_get_node_by_num(i);
1205 if (!node)
1206 continue;
1207 qn->qn_nodes[count].ni_nodenum = node->nd_num;
1208 qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port;
1209 qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address;
1210 mlog(0, "Node %3d, %pI4:%u\n", node->nd_num,
1211 &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port));
1212 ++count;
1213 o2nm_node_put(node);
1214 }
1215
1216 qn->qn_nodenum = dlm->node_num;
1217 qn->qn_numnodes = count;
1218 qn->qn_namelen = strlen(dlm->name);
1219 memcpy(qn->qn_domain, dlm->name, qn->qn_namelen);
1220
1221 i = -1;
1222 while ((i = find_next_bit(node_map, O2NM_MAX_NODES,
1223 i + 1)) < O2NM_MAX_NODES) {
1224 if (i == dlm->node_num)
1225 continue;
1226
1227 mlog(0, "Sending nodeinfo to node %d\n", i);
1228
1229 ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
1230 qn, sizeof(struct dlm_query_nodeinfo),
1231 i, &status);
1232 if (ret >= 0)
1233 ret = status;
1234 if (ret) {
1235 mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i);
1236 break;
1237 }
1238 }
1239
1240bail:
1241 kfree(qn);
1242 return ret;
1243}
1244
1245static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
1246 void *data, void **ret_data)
1247{
1248 struct dlm_query_nodeinfo *qn;
1249 struct dlm_ctxt *dlm = NULL;
1250 int locked = 0, status = -EINVAL;
1251
1252 qn = (struct dlm_query_nodeinfo *) msg->buf;
1253
1254 mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum,
1255 qn->qn_domain);
1256
1257 spin_lock(&dlm_domain_lock);
1258 dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen);
1259 if (!dlm) {
1260 mlog(ML_ERROR, "Node %d queried nodes on domain %s before "
1261 "join domain\n", qn->qn_nodenum, qn->qn_domain);
1262 goto bail;
1263 }
1264
1265 spin_lock(&dlm->spinlock);
1266 locked = 1;
1267 if (dlm->joining_node != qn->qn_nodenum) {
1268 mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
1269 "joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
1270 dlm->joining_node);
1271 goto bail;
1272 }
1273
1274 /* Support for node query was added in 1.1 */
1275 if (dlm->dlm_locking_proto.pv_major == 1 &&
1276 dlm->dlm_locking_proto.pv_minor == 0) {
1277 mlog(ML_ERROR, "Node %d queried nodes on domain %s "
1278 "but active dlm protocol is %d.%d\n", qn->qn_nodenum,
1279 qn->qn_domain, dlm->dlm_locking_proto.pv_major,
1280 dlm->dlm_locking_proto.pv_minor);
1281 goto bail;
1282 }
1283
1284 status = dlm_match_nodes(dlm, qn);
1285
1286bail:
1287 if (locked)
1288 spin_unlock(&dlm->spinlock);
1289 spin_unlock(&dlm_domain_lock);
1290
1291 return status;
1292}
1293
923static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, 1294static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
924 void **ret_data) 1295 void **ret_data)
925{ 1296{
@@ -1240,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
1240 set_bit(dlm->node_num, dlm->domain_map); 1611 set_bit(dlm->node_num, dlm->domain_map);
1241 spin_unlock(&dlm->spinlock); 1612 spin_unlock(&dlm->spinlock);
1242 1613
1614 /* Support for global heartbeat and node info was added in 1.1 */
1615 if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) {
1616 status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
1617 if (status) {
1618 mlog_errno(status);
1619 goto bail;
1620 }
1621 status = dlm_send_regions(dlm, ctxt->yes_resp_map);
1622 if (status) {
1623 mlog_errno(status);
1624 goto bail;
1625 }
1626 }
1627
1243 dlm_send_join_asserts(dlm, ctxt->yes_resp_map); 1628 dlm_send_join_asserts(dlm, ctxt->yes_resp_map);
1244 1629
1245 /* Joined state *must* be set before the joining node 1630 /* Joined state *must* be set before the joining node
@@ -1806,7 +2191,21 @@ static int dlm_register_net_handlers(void)
1806 sizeof(struct dlm_cancel_join), 2191 sizeof(struct dlm_cancel_join),
1807 dlm_cancel_join_handler, 2192 dlm_cancel_join_handler,
1808 NULL, NULL, &dlm_join_handlers); 2193 NULL, NULL, &dlm_join_handlers);
2194 if (status)
2195 goto bail;
2196
2197 status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY,
2198 sizeof(struct dlm_query_region),
2199 dlm_query_region_handler,
2200 NULL, NULL, &dlm_join_handlers);
1809 2201
2202 if (status)
2203 goto bail;
2204
2205 status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY,
2206 sizeof(struct dlm_query_nodeinfo),
2207 dlm_query_nodeinfo_handler,
2208 NULL, NULL, &dlm_join_handlers);
1810bail: 2209bail:
1811 if (status < 0) 2210 if (status < 0)
1812 dlm_unregister_net_handlers(); 2211 dlm_unregister_net_handlers();
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ffb4c68dafa4..f564b0e5f80d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
3433 wake_up(&res->wq); 3433 wake_up(&res->wq);
3434 wake_up(&dlm->migration_wq); 3434 wake_up(&dlm->migration_wq);
3435} 3435}
3436
3437void dlm_force_free_mles(struct dlm_ctxt *dlm)
3438{
3439 int i;
3440 struct hlist_head *bucket;
3441 struct dlm_master_list_entry *mle;
3442 struct hlist_node *tmp, *list;
3443
3444 /*
3445 * We notified all other nodes that we are exiting the domain and
3446 * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
3447 * around we force free them and wake any processes that are waiting
3448 * on the mles
3449 */
3450 spin_lock(&dlm->spinlock);
3451 spin_lock(&dlm->master_lock);
3452
3453 BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
3454 BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
3455
3456 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
3457 bucket = dlm_master_hash(dlm, i);
3458 hlist_for_each_safe(list, tmp, bucket) {
3459 mle = hlist_entry(list, struct dlm_master_list_entry,
3460 master_hash_node);
3461 if (mle->type != DLM_MLE_BLOCK) {
3462 mlog(ML_ERROR, "bad mle: %p\n", mle);
3463 dlm_print_one_mle(mle);
3464 }
3465 atomic_set(&mle->woken, 1);
3466 wake_up(&mle->wq);
3467
3468 __dlm_unlink_mle(dlm, mle);
3469 __dlm_mle_detach_hb_events(dlm, mle);
3470 __dlm_put_mle(mle);
3471 }
3472 }
3473 spin_unlock(&dlm->master_lock);
3474 spin_unlock(&dlm->spinlock);
3475}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d1ce48e1b3d6..1d596d8c4a4a 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -84,6 +84,7 @@ enum {
84 OI_LS_PARENT, 84 OI_LS_PARENT,
85 OI_LS_RENAME1, 85 OI_LS_RENAME1,
86 OI_LS_RENAME2, 86 OI_LS_RENAME2,
87 OI_LS_REFLINK_TARGET,
87}; 88};
88 89
89int ocfs2_dlm_init(struct ocfs2_super *osb); 90int ocfs2_dlm_init(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3064feef1430..d8408217e3bd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -250,7 +250,7 @@ enum ocfs2_local_alloc_state
250 250
251enum ocfs2_mount_options 251enum ocfs2_mount_options
252{ 252{
253 OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ 253 OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */
254 OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ 254 OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */
255 OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ 255 OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */
256 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ 256 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
@@ -263,9 +263,10 @@ enum ocfs2_mount_options
263 control lists */ 263 control lists */
264 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ 264 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
265 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ 265 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
266 266 OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT
267 OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12 /* Allow concurrent O_DIRECT 267 writes */
268 writes */ 268 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
269 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
269}; 270};
270 271
271#define OCFS2_OSB_SOFT_RO 0x0001 272#define OCFS2_OSB_SOFT_RO 0x0001
@@ -379,6 +380,8 @@ struct ocfs2_super
379 struct ocfs2_alloc_stats alloc_stats; 380 struct ocfs2_alloc_stats alloc_stats;
380 char dev_str[20]; /* "major,minor" of the device */ 381 char dev_str[20]; /* "major,minor" of the device */
381 382
383 u8 osb_stackflags;
384
382 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 385 char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
383 struct ocfs2_cluster_connection *cconn; 386 struct ocfs2_cluster_connection *cconn;
384 struct ocfs2_lock_res osb_super_lockres; 387 struct ocfs2_lock_res osb_super_lockres;
@@ -612,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb)
612 return ret; 615 return ret;
613} 616}
614 617
615static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) 618static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb)
616{ 619{
617 return (osb->s_feature_incompat & 620 return (osb->s_feature_incompat &
618 OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); 621 (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK |
622 OCFS2_FEATURE_INCOMPAT_CLUSTERINFO));
623}
624
625static inline int ocfs2_userspace_stack(struct ocfs2_super *osb)
626{
627 if (ocfs2_clusterinfo_valid(osb) &&
628 memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
629 OCFS2_STACK_LABEL_LEN))
630 return 1;
631 return 0;
632}
633
634static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb)
635{
636 if (ocfs2_clusterinfo_valid(osb) &&
637 !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK,
638 OCFS2_STACK_LABEL_LEN))
639 return 1;
640 return 0;
641}
642
643static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
644{
645 return ocfs2_o2cb_stack(osb) &&
646 (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT);
619} 647}
620 648
621static inline int ocfs2_mount_local(struct ocfs2_super *osb) 649static inline int ocfs2_mount_local(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 723b20dac414..c2e4f8222e2f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -101,7 +101,8 @@
101 | OCFS2_FEATURE_INCOMPAT_META_ECC \ 101 | OCFS2_FEATURE_INCOMPAT_META_ECC \
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) 104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
105#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 106#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
106 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 107 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
107 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 108 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -170,6 +171,13 @@
170#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 171#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
171 172
172/* 173/*
174 * Incompat bit to indicate useable clusterinfo with stackflags for all
175 * cluster stacks (userspace adnd o2cb). If this bit is set,
176 * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set.
177 */
178#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
179
180/*
173 * backup superblock flag is used to indicate that this volume 181 * backup superblock flag is used to indicate that this volume
174 * has backup superblocks. 182 * has backup superblocks.
175 */ 183 */
@@ -235,18 +243,31 @@
235#define OCFS2_HAS_REFCOUNT_FL (0x0010) 243#define OCFS2_HAS_REFCOUNT_FL (0x0010)
236 244
237/* Inode attributes, keep in sync with EXT2 */ 245/* Inode attributes, keep in sync with EXT2 */
238#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ 246#define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */
239#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ 247#define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */
240#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ 248#define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */
241#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ 249#define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */
242#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ 250#define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */
243#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ 251#define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */
244#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ 252#define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */
245#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ 253#define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */
246#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ 254/* Reserved for compression usage... */
247 255#define OCFS2_DIRTY_FL FS_DIRTY_FL
248#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ 256#define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */
249#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ 257#define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */
258#define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */
259/* End compression flags --- maybe not all used */
260#define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */
261#define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */
262#define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */
263#define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */
264#define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */
265#define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */
266#define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/
267#define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */
268
269#define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */
270#define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */
250 271
251/* 272/*
252 * Extent record flags (e_node.leaf.flags) 273 * Extent record flags (e_node.leaf.flags)
@@ -279,10 +300,13 @@
279#define OCFS2_VOL_UUID_LEN 16 300#define OCFS2_VOL_UUID_LEN 16
280#define OCFS2_MAX_VOL_LABEL_LEN 64 301#define OCFS2_MAX_VOL_LABEL_LEN 64
281 302
282/* The alternate, userspace stack fields */ 303/* The cluster stack fields */
283#define OCFS2_STACK_LABEL_LEN 4 304#define OCFS2_STACK_LABEL_LEN 4
284#define OCFS2_CLUSTER_NAME_LEN 16 305#define OCFS2_CLUSTER_NAME_LEN 16
285 306
307/* Classic (historically speaking) cluster stack */
308#define OCFS2_CLASSIC_CLUSTER_STACK "o2cb"
309
286/* Journal limits (in bytes) */ 310/* Journal limits (in bytes) */
287#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) 311#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
288 312
@@ -292,6 +316,11 @@
292 */ 316 */
293#define OCFS2_MIN_XATTR_INLINE_SIZE 256 317#define OCFS2_MIN_XATTR_INLINE_SIZE 256
294 318
319/*
320 * Cluster info flags (ocfs2_cluster_info.ci_stackflags)
321 */
322#define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01)
323
295struct ocfs2_system_inode_info { 324struct ocfs2_system_inode_info {
296 char *si_name; 325 char *si_name;
297 int si_iflags; 326 int si_iflags;
@@ -352,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
352/* Parameter passed from mount.ocfs2 to module */ 381/* Parameter passed from mount.ocfs2 to module */
353#define OCFS2_HB_NONE "heartbeat=none" 382#define OCFS2_HB_NONE "heartbeat=none"
354#define OCFS2_HB_LOCAL "heartbeat=local" 383#define OCFS2_HB_LOCAL "heartbeat=local"
384#define OCFS2_HB_GLOBAL "heartbeat=global"
355 385
356/* 386/*
357 * OCFS2 directory file types. Only the low 3 bits are used. The 387 * OCFS2 directory file types. Only the low 3 bits are used. The
@@ -558,9 +588,21 @@ struct ocfs2_slot_map_extended {
558 */ 588 */
559}; 589};
560 590
591/*
592 * ci_stackflags is only valid if the incompat bit
593 * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set.
594 */
561struct ocfs2_cluster_info { 595struct ocfs2_cluster_info {
562/*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; 596/*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN];
563 __le32 ci_reserved; 597 union {
598 __le32 ci_reserved;
599 struct {
600 __u8 ci_stackflags;
601 __u8 ci_reserved1;
602 __u8 ci_reserved2;
603 __u8 ci_reserved3;
604 };
605 };
564/*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; 606/*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN];
565/*18*/ 607/*18*/
566}; 608};
@@ -597,9 +639,9 @@ struct ocfs2_super_block {
597 * group header */ 639 * group header */
598/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ 640/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
599/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ 641/*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
600/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace 642/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either
601 stack. Only valid 643 userspace or clusterinfo
602 with INCOMPAT flag. */ 644 INCOMPAT flag set. */
603/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size 645/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
604 for this fs*/ 646 for this fs*/
605 __le16 s_reserved0; 647 __le16 s_reserved0;
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 9bc535499868..b46f39bf7438 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -23,10 +23,10 @@
23/* 23/*
24 * ioctl commands 24 * ioctl commands
25 */ 25 */
26#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) 26#define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS
27#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) 27#define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS
28#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) 28#define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS
29#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) 29#define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS
30 30
31/* 31/*
32 * Space reservation / allocation / free ioctls and argument structure 32 * Space reservation / allocation / free ioctls and argument structure
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a120cfcf69bf..b5f9160e93e9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4240,8 +4240,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
4240 goto out; 4240 goto out;
4241 } 4241 }
4242 4242
4243 mutex_lock(&new_inode->i_mutex); 4243 mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
4244 ret = ocfs2_inode_lock(new_inode, &new_bh, 1); 4244 ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
4245 OI_LS_REFLINK_TARGET);
4245 if (ret) { 4246 if (ret) {
4246 mlog_errno(ret); 4247 mlog_errno(ret);
4247 goto out_unlock; 4248 goto out_unlock;
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index d8b6e4259b80..3e78db361bc7 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
732 struct ocfs2_alloc_reservation *resv, 732 struct ocfs2_alloc_reservation *resv,
733 int *cstart, int *clen) 733 int *cstart, int *clen)
734{ 734{
735 unsigned int wanted = *clen;
736
737 if (resv == NULL || ocfs2_resmap_disabled(resmap)) 735 if (resv == NULL || ocfs2_resmap_disabled(resmap))
738 return -ENOSPC; 736 return -ENOSPC;
739 737
740 spin_lock(&resv_lock); 738 spin_lock(&resv_lock);
741 739
742 /*
743 * We don't want to over-allocate for temporary
744 * windows. Otherwise, we run the risk of fragmenting the
745 * allocation space.
746 */
747 wanted = ocfs2_resv_window_bits(resmap, resv);
748 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
749 wanted = *clen;
750
751 if (ocfs2_resv_empty(resv)) { 740 if (ocfs2_resv_empty(resv)) {
752 mlog(0, "empty reservation, find new window\n"); 741 /*
742 * We don't want to over-allocate for temporary
743 * windows. Otherwise, we run the risk of fragmenting the
744 * allocation space.
745 */
746 unsigned int wanted = ocfs2_resv_window_bits(resmap, resv);
753 747
748 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
749 wanted = *clen;
750
751 mlog(0, "empty reservation, find new window\n");
754 /* 752 /*
755 * Try to get a window here. If it works, we must fall 753 * Try to get a window here. If it works, we must fall
756 * through and test the bitmap . This avoids some 754 * through and test the bitmap . This avoids some
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 0d3049f696c5..19965b00c43c 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
283 /* for now we only have one cluster/node, make sure we see it 283 /* for now we only have one cluster/node, make sure we see it
284 * in the heartbeat universe */ 284 * in the heartbeat universe */
285 if (!o2hb_check_local_node_heartbeating()) { 285 if (!o2hb_check_local_node_heartbeating()) {
286 if (o2hb_global_heartbeat_active())
287 mlog(ML_ERROR, "Global heartbeat not started\n");
286 rc = -EINVAL; 288 rc = -EINVAL;
287 goto out; 289 goto out;
288 } 290 }
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 64f2c50a1c37..5fed60de7630 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -357,7 +357,7 @@ out:
357static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, 357static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
358 struct ocfs2_group_desc *bg, 358 struct ocfs2_group_desc *bg,
359 struct ocfs2_chain_list *cl, 359 struct ocfs2_chain_list *cl,
360 u64 p_blkno, u32 clusters) 360 u64 p_blkno, unsigned int clusters)
361{ 361{
362 struct ocfs2_extent_list *el = &bg->bg_list; 362 struct ocfs2_extent_list *el = &bg->bg_list;
363 struct ocfs2_extent_rec *rec; 363 struct ocfs2_extent_rec *rec;
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
369 rec->e_blkno = cpu_to_le64(p_blkno); 369 rec->e_blkno = cpu_to_le64(p_blkno);
370 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / 370 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
371 le16_to_cpu(cl->cl_bpc)); 371 le16_to_cpu(cl->cl_bpc));
372 rec->e_leaf_clusters = cpu_to_le32(clusters); 372 rec->e_leaf_clusters = cpu_to_le16(clusters);
373 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); 373 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
374 le16_add_cpu(&bg->bg_free_bits_count, 374 le16_add_cpu(&bg->bg_free_bits_count,
375 clusters * le16_to_cpu(cl->cl_bpc)); 375 clusters * le16_to_cpu(cl->cl_bpc));
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 9122d59f8127..a8a0ca44f88f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -162,6 +162,7 @@ enum {
162 Opt_nointr, 162 Opt_nointr,
163 Opt_hb_none, 163 Opt_hb_none,
164 Opt_hb_local, 164 Opt_hb_local,
165 Opt_hb_global,
165 Opt_data_ordered, 166 Opt_data_ordered,
166 Opt_data_writeback, 167 Opt_data_writeback,
167 Opt_atime_quantum, 168 Opt_atime_quantum,
@@ -192,6 +193,7 @@ static const match_table_t tokens = {
192 {Opt_nointr, "nointr"}, 193 {Opt_nointr, "nointr"},
193 {Opt_hb_none, OCFS2_HB_NONE}, 194 {Opt_hb_none, OCFS2_HB_NONE},
194 {Opt_hb_local, OCFS2_HB_LOCAL}, 195 {Opt_hb_local, OCFS2_HB_LOCAL},
196 {Opt_hb_global, OCFS2_HB_GLOBAL},
195 {Opt_data_ordered, "data=ordered"}, 197 {Opt_data_ordered, "data=ordered"},
196 {Opt_data_writeback, "data=writeback"}, 198 {Opt_data_writeback, "data=writeback"},
197 {Opt_atime_quantum, "atime_quantum=%u"}, 199 {Opt_atime_quantum, "atime_quantum=%u"},
@@ -626,6 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
626 int ret = 0; 628 int ret = 0;
627 struct mount_options parsed_options; 629 struct mount_options parsed_options;
628 struct ocfs2_super *osb = OCFS2_SB(sb); 630 struct ocfs2_super *osb = OCFS2_SB(sb);
631 u32 tmp;
629 632
630 lock_kernel(); 633 lock_kernel();
631 634
@@ -635,8 +638,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
635 goto out; 638 goto out;
636 } 639 }
637 640
638 if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != 641 tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
639 (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 642 OCFS2_MOUNT_HB_NONE;
643 if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
640 ret = -EINVAL; 644 ret = -EINVAL;
641 mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); 645 mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
642 goto out; 646 goto out;
@@ -827,23 +831,29 @@ bail:
827 831
828static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) 832static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
829{ 833{
830 if (ocfs2_mount_local(osb)) { 834 u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL;
831 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { 835
836 if (osb->s_mount_opt & hb_enabled) {
837 if (ocfs2_mount_local(osb)) {
832 mlog(ML_ERROR, "Cannot heartbeat on a locally " 838 mlog(ML_ERROR, "Cannot heartbeat on a locally "
833 "mounted device.\n"); 839 "mounted device.\n");
834 return -EINVAL; 840 return -EINVAL;
835 } 841 }
836 } 842 if (ocfs2_userspace_stack(osb)) {
837
838 if (ocfs2_userspace_stack(osb)) {
839 if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
840 mlog(ML_ERROR, "Userspace stack expected, but " 843 mlog(ML_ERROR, "Userspace stack expected, but "
841 "o2cb heartbeat arguments passed to mount\n"); 844 "o2cb heartbeat arguments passed to mount\n");
842 return -EINVAL; 845 return -EINVAL;
843 } 846 }
847 if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) &&
848 !ocfs2_cluster_o2cb_global_heartbeat(osb)) ||
849 ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) &&
850 ocfs2_cluster_o2cb_global_heartbeat(osb))) {
851 mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n");
852 return -EINVAL;
853 }
844 } 854 }
845 855
846 if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { 856 if (!(osb->s_mount_opt & hb_enabled)) {
847 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && 857 if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) &&
848 !ocfs2_userspace_stack(osb)) { 858 !ocfs2_userspace_stack(osb)) {
849 mlog(ML_ERROR, "Heartbeat has to be started to mount " 859 mlog(ML_ERROR, "Heartbeat has to be started to mount "
@@ -1309,6 +1319,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1309{ 1319{
1310 int status; 1320 int status;
1311 char *p; 1321 char *p;
1322 u32 tmp;
1312 1323
1313 mlog_entry("remount: %d, options: \"%s\"\n", is_remount, 1324 mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
1314 options ? options : "(none)"); 1325 options ? options : "(none)");
@@ -1340,7 +1351,10 @@ static int ocfs2_parse_options(struct super_block *sb,
1340 mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; 1351 mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
1341 break; 1352 break;
1342 case Opt_hb_none: 1353 case Opt_hb_none:
1343 mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; 1354 mopt->mount_opt |= OCFS2_MOUNT_HB_NONE;
1355 break;
1356 case Opt_hb_global:
1357 mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL;
1344 break; 1358 break;
1345 case Opt_barrier: 1359 case Opt_barrier:
1346 if (match_int(&args[0], &option)) { 1360 if (match_int(&args[0], &option)) {
@@ -1501,6 +1515,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1501 } 1515 }
1502 } 1516 }
1503 1517
1518 /* Ensure only one heartbeat mode */
1519 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
1520 OCFS2_MOUNT_HB_NONE);
1521 if (hweight32(tmp) != 1) {
1522 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1523 status = 0;
1524 goto bail;
1525 }
1526
1504 status = 1; 1527 status = 1;
1505 1528
1506bail: 1529bail:
@@ -1514,10 +1537,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1514 unsigned long opts = osb->s_mount_opt; 1537 unsigned long opts = osb->s_mount_opt;
1515 unsigned int local_alloc_megs; 1538 unsigned int local_alloc_megs;
1516 1539
1517 if (opts & OCFS2_MOUNT_HB_LOCAL) 1540 if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) {
1518 seq_printf(s, ",_netdev,heartbeat=local"); 1541 seq_printf(s, ",_netdev");
1519 else 1542 if (opts & OCFS2_MOUNT_HB_LOCAL)
1520 seq_printf(s, ",heartbeat=none"); 1543 seq_printf(s, ",%s", OCFS2_HB_LOCAL);
1544 else
1545 seq_printf(s, ",%s", OCFS2_HB_GLOBAL);
1546 } else
1547 seq_printf(s, ",%s", OCFS2_HB_NONE);
1521 1548
1522 if (opts & OCFS2_MOUNT_NOINTR) 1549 if (opts & OCFS2_MOUNT_NOINTR)
1523 seq_printf(s, ",nointr"); 1550 seq_printf(s, ",nointr");
@@ -2209,7 +2236,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
2209 goto bail; 2236 goto bail;
2210 } 2237 }
2211 2238
2212 if (ocfs2_userspace_stack(osb)) { 2239 if (ocfs2_clusterinfo_valid(osb)) {
2240 osb->osb_stackflags =
2241 OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
2213 memcpy(osb->osb_cluster_stack, 2242 memcpy(osb->osb_cluster_stack,
2214 OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, 2243 OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
2215 OCFS2_STACK_LABEL_LEN); 2244 OCFS2_STACK_LABEL_LEN);
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 32499d213fc4..9975457c981f 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
128 } 128 }
129 129
130 /* Fast symlinks can't be large */ 130 /* Fast symlinks can't be large */
131 len = strlen(target); 131 len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb));
132 link = kzalloc(len + 1, GFP_NOFS); 132 link = kzalloc(len + 1, GFP_NOFS);
133 if (!link) { 133 if (!link) {
134 status = -ENOMEM; 134 status = -ENOMEM;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d03469f61801..06fa5e77c40e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
1286 xis.inode_bh = xbs.inode_bh = di_bh; 1286 xis.inode_bh = xbs.inode_bh = di_bh;
1287 di = (struct ocfs2_dinode *)di_bh->b_data; 1287 di = (struct ocfs2_dinode *)di_bh->b_data;
1288 1288
1289 down_read(&oi->ip_xattr_sem);
1290 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 1289 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1291 buffer_size, &xis); 1290 buffer_size, &xis);
1292 if (ret == -ENODATA && di->i_xattr_loc) 1291 if (ret == -ENODATA && di->i_xattr_loc)
1293 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1292 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1294 buffer_size, &xbs); 1293 buffer_size, &xbs);
1295 up_read(&oi->ip_xattr_sem);
1296 1294
1297 return ret; 1295 return ret;
1298} 1296}
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode,
1316 mlog_errno(ret); 1314 mlog_errno(ret);
1317 return ret; 1315 return ret;
1318 } 1316 }
1317 down_read(&OCFS2_I(inode)->ip_xattr_sem);
1319 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, 1318 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1320 name, buffer, buffer_size); 1319 name, buffer, buffer_size);
1320 up_read(&OCFS2_I(inode)->ip_xattr_sem);
1321 1321
1322 ocfs2_inode_unlock(inode, 0); 1322 ocfs2_inode_unlock(inode, 0);
1323 1323
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a1c43e7c8a7b..8e4addaa5424 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2675 INF("auxv", S_IRUSR, proc_pid_auxv), 2675 INF("auxv", S_IRUSR, proc_pid_auxv),
2676 ONE("status", S_IRUGO, proc_pid_status), 2676 ONE("status", S_IRUGO, proc_pid_status),
2677 ONE("personality", S_IRUSR, proc_pid_personality), 2677 ONE("personality", S_IRUSR, proc_pid_personality),
2678 INF("limits", S_IRUSR, proc_pid_limits), 2678 INF("limits", S_IRUGO, proc_pid_limits),
2679#ifdef CONFIG_SCHED_DEBUG 2679#ifdef CONFIG_SCHED_DEBUG
2680 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2680 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2681#endif 2681#endif
@@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = {
3011 INF("auxv", S_IRUSR, proc_pid_auxv), 3011 INF("auxv", S_IRUSR, proc_pid_auxv),
3012 ONE("status", S_IRUGO, proc_pid_status), 3012 ONE("status", S_IRUGO, proc_pid_status),
3013 ONE("personality", S_IRUSR, proc_pid_personality), 3013 ONE("personality", S_IRUSR, proc_pid_personality),
3014 INF("limits", S_IRUSR, proc_pid_limits), 3014 INF("limits", S_IRUGO, proc_pid_limits),
3015#ifdef CONFIG_SCHED_DEBUG 3015#ifdef CONFIG_SCHED_DEBUG
3016 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3016 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3017#endif 3017#endif
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 180cf5a0bd67..3b8b45660331 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page)
146 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); 146 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
147#endif 147#endif
148 148
149#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR 149#ifdef CONFIG_ARCH_USES_PG_UNCACHED
150 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); 150 u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
151#endif 151#endif
152 152
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 271afc48b9a5..1dbca4e8cc16 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
363 mss->referenced += PAGE_SIZE; 363 mss->referenced += PAGE_SIZE;
364 mapcount = page_mapcount(page); 364 mapcount = page_mapcount(page);
365 if (mapcount >= 2) { 365 if (mapcount >= 2) {
366 if (pte_dirty(ptent)) 366 if (pte_dirty(ptent) || PageDirty(page))
367 mss->shared_dirty += PAGE_SIZE; 367 mss->shared_dirty += PAGE_SIZE;
368 else 368 else
369 mss->shared_clean += PAGE_SIZE; 369 mss->shared_clean += PAGE_SIZE;
370 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; 370 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
371 } else { 371 } else {
372 if (pte_dirty(ptent)) 372 if (pte_dirty(ptent) || PageDirty(page))
373 mss->private_dirty += PAGE_SIZE; 373 mss->private_dirty += PAGE_SIZE;
374 else 374 else
375 mss->private_clean += PAGE_SIZE; 375 mss->private_clean += PAGE_SIZE;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 91c817ff02c3..2367fb3f70bc 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
163 163
164static const struct file_operations proc_vmcore_operations = { 164static const struct file_operations proc_vmcore_operations = {
165 .read = read_vmcore, 165 .read = read_vmcore,
166 .llseek = generic_file_llseek, 166 .llseek = default_llseek,
167}; 167};
168 168
169static struct vmcore* __init get_new_element(void) 169static struct vmcore* __init get_new_element(void)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index f53505de0712..5cbb81e134ac 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
170int reiserfs_unpack(struct inode *inode, struct file *filp) 170int reiserfs_unpack(struct inode *inode, struct file *filp)
171{ 171{
172 int retval = 0; 172 int retval = 0;
173 int depth;
173 int index; 174 int index;
174 struct page *page; 175 struct page *page;
175 struct address_space *mapping; 176 struct address_space *mapping;
@@ -188,8 +189,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
188 /* we need to make sure nobody is changing the file size beneath 189 /* we need to make sure nobody is changing the file size beneath
189 ** us 190 ** us
190 */ 191 */
191 mutex_lock(&inode->i_mutex); 192 reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
192 reiserfs_write_lock(inode->i_sb); 193 depth = reiserfs_write_lock_once(inode->i_sb);
193 194
194 write_from = inode->i_size & (blocksize - 1); 195 write_from = inode->i_size & (blocksize - 1);
195 /* if we are on a block boundary, we are already unpacked. */ 196 /* if we are on a block boundary, we are already unpacked. */
@@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
224 225
225 out: 226 out:
226 mutex_unlock(&inode->i_mutex); 227 mutex_unlock(&inode->i_mutex);
227 reiserfs_write_unlock(inode->i_sb); 228 reiserfs_write_unlock_once(inode->i_sb, depth);
228 return retval; 229 return retval;
229} 230}
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d72cf2bb054a..286e36e21dae 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1932,7 +1932,8 @@ xfs_buf_init(void)
1932 if (!xfs_buf_zone) 1932 if (!xfs_buf_zone)
1933 goto out; 1933 goto out;
1934 1934
1935 xfslogd_workqueue = create_workqueue("xfslogd"); 1935 xfslogd_workqueue = alloc_workqueue("xfslogd",
1936 WQ_RESCUER | WQ_HIGHPRI, 1);
1936 if (!xfslogd_workqueue) 1937 if (!xfslogd_workqueue)
1937 goto out_free_buf_zone; 1938 goto out_free_buf_zone;
1938 1939
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4fec427b83ef..3b9e626f7cd1 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr(
785{ 785{
786 struct fsxattr fa; 786 struct fsxattr fa;
787 787
788 memset(&fa, 0, sizeof(struct fsxattr));
789
788 xfs_ilock(ip, XFS_ILOCK_SHARED); 790 xfs_ilock(ip, XFS_ILOCK_SHARED);
789 fa.fsx_xflags = xfs_ip2xflags(ip); 791 fa.fsx_xflags = xfs_ip2xflags(ip);
790 fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; 792 fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ed575fb4b495..7e206fc1fa36 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -405,9 +405,15 @@ xlog_cil_push(
405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 405 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
406 new_ctx->ticket = xlog_cil_ticket_alloc(log); 406 new_ctx->ticket = xlog_cil_ticket_alloc(log);
407 407
408 /* lock out transaction commit, but don't block on background push */ 408 /*
409 * Lock out transaction commit, but don't block for background pushes
410 * unless we are well over the CIL space limit. See the definition of
411 * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
412 * used here.
413 */
409 if (!down_write_trylock(&cil->xc_ctx_lock)) { 414 if (!down_write_trylock(&cil->xc_ctx_lock)) {
410 if (!push_seq) 415 if (!push_seq &&
416 cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
411 goto out_free_ticket; 417 goto out_free_ticket;
412 down_write(&cil->xc_ctx_lock); 418 down_write(&cil->xc_ctx_lock);
413 } 419 }
@@ -422,7 +428,7 @@ xlog_cil_push(
422 goto out_skip; 428 goto out_skip;
423 429
424 /* check for a previously pushed seqeunce */ 430 /* check for a previously pushed seqeunce */
425 if (push_seq < cil->xc_ctx->sequence) 431 if (push_seq && push_seq < cil->xc_ctx->sequence)
426 goto out_skip; 432 goto out_skip;
427 433
428 /* 434 /*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ced52b98b322..edcdfe01617f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -426,13 +426,13 @@ struct xfs_cil {
426}; 426};
427 427
428/* 428/*
429 * The amount of log space we should the CIL to aggregate is difficult to size. 429 * The amount of log space we allow the CIL to aggregate is difficult to size.
430 * Whatever we chose we have to make we can get a reservation for the log space 430 * Whatever we choose, we have to make sure we can get a reservation for the
431 * effectively, that it is large enough to capture sufficient relogging to 431 * log space effectively, that it is large enough to capture sufficient
432 * reduce log buffer IO significantly, but it is not too large for the log or 432 * relogging to reduce log buffer IO significantly, but it is not too large for
433 * induces too much latency when writing out through the iclogs. We track both 433 * the log or induces too much latency when writing out through the iclogs. We
434 * space consumed and the number of vectors in the checkpoint context, so we 434 * track both space consumed and the number of vectors in the checkpoint
435 * need to decide which to use for limiting. 435 * context, so we need to decide which to use for limiting.
436 * 436 *
437 * Every log buffer we write out during a push needs a header reserved, which 437 * Every log buffer we write out during a push needs a header reserved, which
438 * is at least one sector and more for v2 logs. Hence we need a reservation of 438 * is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -459,16 +459,21 @@ struct xfs_cil {
459 * checkpoint transaction ticket is specific to the checkpoint context, rather 459 * checkpoint transaction ticket is specific to the checkpoint context, rather
460 * than the CIL itself. 460 * than the CIL itself.
461 * 461 *
462 * With dynamic reservations, we can basically make up arbitrary limits for the 462 * With dynamic reservations, we can effectively make up arbitrary limits for
463 * checkpoint size so long as they don't violate any other size rules. Hence 463 * the checkpoint size so long as they don't violate any other size rules.
464 * the initial maximum size for the checkpoint transaction will be set to a 464 * Recovery imposes a rule that no transaction exceed half the log, so we are
465 * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit 465 * limited by that. Furthermore, the log transaction reservation subsystem
466 * right now based on the latency of writing out a large amount of data through 466 * tries to keep 25% of the log free, so we need to keep below that limit or we
467 * the circular iclog buffers. 467 * risk running out of free log space to start any new transactions.
468 *
469 * In order to keep background CIL push efficient, we will set a lower
470 * threshold at which background pushing is attempted without blocking current
471 * transaction commits. A separate, higher bound defines when CIL pushes are
472 * enforced to ensure we stay within our maximum checkpoint size bounds.
473 * threshold, yet give us plenty of space for aggregation on large logs.
468 */ 474 */
469 475#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
470#define XLOG_CIL_SPACE_LIMIT(log) \ 476#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
471 (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
472 477
473/* 478/*
474 * The reservation head lsn is not made up of a cycle number and block number. 479 * The reservation head lsn is not made up of a cycle number and block number.